From 450c3e6d90a9c1a3c03c790a4c357274004f9174 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Thu, 30 Mar 2023 15:25:45 +0300 Subject: [PATCH 01/35] Update OpenCL headers in include/CL this is required for cl_khr_command_buffer --- include/CL/cl.h | 53 +- include/CL/cl.hpp | 12966 ------------------- include/CL/cl_d3d10.h | 38 +- include/CL/cl_d3d11.h | 40 +- include/CL/cl_dx9_media_sharing.h | 158 +- include/CL/cl_dx9_media_sharing_intel.h | 156 +- include/CL/cl_egl.h | 8 +- include/CL/cl_ext.h | 1816 ++- include/CL/cl_ext_intel.h | 716 +- include/CL/cl_gl.h | 47 +- include/CL/cl_gl_ext.h | 26 +- include/CL/cl_icd.h | 318 +- include/CL/cl_layer.h | 61 + include/CL/cl_platform.h | 164 +- include/CL/cl_va_api_media_sharing_intel.h | 75 +- include/CL/opencl.hpp | 342 +- 16 files changed, 2620 insertions(+), 14364 deletions(-) delete mode 100644 include/CL/cl.hpp create mode 100644 include/CL/cl_layer.h diff --git a/include/CL/cl.h b/include/CL/cl.h index 3a5aae486..6c700ab17 100644 --- a/include/CL/cl.h +++ b/include/CL/cl.h @@ -141,6 +141,10 @@ typedef struct _cl_image_desc { #pragma warning( push ) #pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 builds */ #endif +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc11-extensions" /* Prevents warning about nameless union being C11 extension*/ +#endif #if defined(_MSC_VER) && defined(__STDC__) /* Anonymous unions are not supported in /Za builds */ #else @@ -158,6 +162,9 @@ typedef struct _cl_image_desc { #if defined(_MSC_VER) && !defined(__STDC__) #pragma warning( pop ) #endif +#ifdef __clang__ +#pragma clang diagnostic pop +#endif #endif } cl_image_desc; @@ -1311,11 +1318,11 @@ clLinkProgram(cl_context context, #ifdef CL_VERSION_2_2 -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL clSetProgramReleaseCallback(cl_program program, void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), - void * user_data) CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED; + void * user_data) CL_API_SUFFIX__VERSION_2_2_DEPRECATED; extern CL_API_ENTRY cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, @@ -1857,11 +1864,11 @@ clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, clSetCommandQueueProperty(cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, - cl_command_queue_properties * old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; + cl_command_queue_properties * old_properties) CL_API_SUFFIX__VERSION_1_0_DEPRECATED; #endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ /* Deprecated OpenCL 1.1 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage2D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, @@ -1869,9 +1876,9 @@ clCreateImage2D(cl_context context, size_t image_height, size_t image_row_pitch, void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage3D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, @@ -1881,46 +1888,46 @@ clCreateImage3D(cl_context context, size_t image_row_pitch, size_t image_slice_pitch, void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueMarker(cl_command_queue command_queue, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_event * event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events, - const cl_event * event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clEnqueueBarrier(cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL -clGetExtensionFunctionAddress(const char * func_name) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; /* Deprecated OpenCL 2.0 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL clCreateSampler(cl_context context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + cl_event * event) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; #ifdef __cplusplus } diff --git a/include/CL/cl.hpp b/include/CL/cl.hpp deleted file mode 100644 index 3e739e73a..000000000 --- a/include/CL/cl.hpp +++ /dev/null @@ -1,12966 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2015 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS - * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS - * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT - * https://www.khronos.org/registry/ - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and - * OpenCL 1.2 (rev 15) - * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes - * - * Additions and fixes from: - * Brian Cole, March 3rd 2010 and April 2012 - * Matt Gruenke, April 2012. - * Bruce Merry, February 2013. - * Tom Deakin and Simon McIntosh-Smith, July 2013 - * - * \version 1.2.9 - * \date December 2015 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.2 (revision 09) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -// The latest version of the OpenCL C++ bindings can be found on GitHub: -// -> https://github.com/KhronosGroup/OpenCL-CLHPP -#pragma message("This version of the OpenCL Host API C++ bindings is deprecated, please use cl2.hpp instead.") - -#ifdef _WIN32 - -#include - -#if defined(USE_DX_INTEROP) -#include -#include -#endif -#endif // _WIN32 - -#if defined(_MSC_VER) -#include -#endif // _MSC_VER - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#else -#include -#endif // !__APPLE__ - -#if (_MSC_VER >= 1700) || (__cplusplus >= 201103L) -#define CL_HPP_RVALUE_REFERENCES_SUPPORTED -#define CL_HPP_CPP11_ATOMICS_SUPPORTED -#include -#endif - -#if (__cplusplus >= 201103L) -#define CL_HPP_NOEXCEPT noexcept -#else -#define CL_HPP_NOEXCEPT -#endif - - -// To avoid accidentally taking ownership of core OpenCL types -// such as cl_kernel constructors are made explicit -// under OpenCL 1.2 -#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS explicit -#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -// Define deprecated prefixes and suffixes to ensure compilation -// in case they are not pre-defined -#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include -#include -#include - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -#endif // #if defined(__CL_ENABLE_EXCEPTIONS) - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(__ANDROID__) || defined(linux) || defined(__APPLE__) || defined(__MACOSX) -#include -#endif // linux - -#include - -// Compiler specific weak linking -#ifndef CL_WEAK_ATTRIB_PREFIX -// C++17: use inline variables/functions -#if __cplusplus >= 201703L -#define CL_USE_INLINE -#endif - -#ifdef CL_USE_INLINE -#define CL_WEAK_ATTRIB_PREFIX inline -#define CL_WEAK_ATTRIB_SUFFIX -#elif _WIN32 -#define CL_WEAK_ATTRIB_PREFIX __declspec(selectany) -#define CL_WEAK_ATTRIB_SUFFIX -#else // GCC, CLANG, etc. -#define CL_WEAK_ATTRIB_PREFIX -#define CL_WEAK_ATTRIB_SUFFIX __attribute__((weak)) -#endif // CL_USE_INLINE - -#endif // CL_WEAK_ATTRIB_PREFIX - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -class Memory; - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -#if defined(CL_VERSION_1_2) -#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddressForPlatform(platform, #name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; -class Buffer; - -#if defined(__CL_ENABLE_EXCEPTIONS) -/*! \brief Exception class - * - * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! \brief Create a new CL error exception for a given error code - * and corresponding message. - * - * \param err error code value. - * - * \param errStr a descriptive string that must remain in scope until - * handling of the exception has concluded. If set, it - * will be returned by what(). - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - - -namespace detail -{ -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - (void) errStr; // suppress unused variable warning - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS -} - - - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#if defined(CL_VERSION_1_2) -#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) -#endif // #if defined(CL_VERSION_1_2) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __COPY_ERR __ERR_STR(cl::copy) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#if defined(CL_VERSION_1_2) -#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) -#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) -#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#if defined(CL_VERSION_1_2) -#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) -#endif // #if defined(CL_VERSION_1_2) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#if defined(CL_VERSION_1_2) -#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) -#define __LINK_PROGRAM_ERR __ERR_STR(clLinkProgram) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) -#endif // #if defined(CL_VERSION_1_2) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - - -#define __RETAIN_ERR __ERR_STR(Retain Object) -#define __RELEASE_ERR __ERR_STR(Release Object) -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) -#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) - -/** - * CL 1.2 version that uses device fission. - */ -#if defined(CL_VERSION_1_2) -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) -#else -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // #if defined(CL_VERSION_1_2) - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) -#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) -#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#endif // #if defined(CL_VERSION_1_1) - -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/** - * CL 1.2 marker and barrier commands - */ -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) -#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) -#endif // #if defined(CL_VERSION_1_2) - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - - * \note Deprecated. Please use std::string as default or - * re-define the string class to match the std::string - * interface by defining STRING_CLASS - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string -{ -private: - ::size_t size_; - char * str_; -public: - //! \brief Constructs an empty string, allocating no memory. - string(void) : size_(0), str_(NULL) - { - } - - /*! \brief Constructs a string populated from an arbitrary value of - * specified size. - * - * An extra '\0' is added, in case none was contained in str. - * - * \param str the initial value of the string instance. Note that '\0' - * characters receive no special treatment. If NULL, - * the string is left empty, with a size of 0. - * - * \param size the number of characters to copy from str. - */ - string(const char * str, ::size_t size) : - size_(size), - str_(NULL) - { - if( size > 0 ) { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - } - - /*! \brief Constructs a string populated from a null-terminated value. - * - * \param str the null-terminated initial value of the string instance. - * If NULL, the string is left empty, with a size of 0. - */ - string(const char * str) : - size_(0), - str_(NULL) - { - if( str ) { - size_= ::strlen(str); - } - if( size_ > 0 ) { - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - } - } - - void resize( ::size_t n ) - { - if( size_ == n ) { - return; - } - if (n == 0) { - if( str_ ) { - delete [] str_; - } - str_ = NULL; - size_ = 0; - } - else { - char *newString = new char[n + 1]; - ::size_t copySize = n; - if( size_ < n ) { - copySize = size_; - } - size_ = n; - - if(str_) { - memcpy(newString, str_, (copySize + 1) * sizeof(char)); - } - if( copySize < size_ ) { - memset(newString + copySize, 0, size_ - copySize); - } - newString[size_] = '\0'; - - delete [] str_; - str_ = newString; - } - } - - const char& operator[] ( ::size_t pos ) const - { - return str_[pos]; - } - - char& operator[] ( ::size_t pos ) - { - return str_[pos]; - } - - /*! \brief Copies the value of another string to this one. - * - * \param rhs the string to copy. - * - * \returns a reference to the modified instance. - */ - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if( str_ != NULL ) { - delete [] str_; - str_ = NULL; - size_ = 0; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - str_ = NULL; - size_ = 0; - } - else { - str_ = new char[rhs.size_ + 1]; - size_ = rhs.size_; - - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - /*! \brief Constructs a string by copying the value of another instance. - * - * \param rhs the string to copy. - */ - string(const string& rhs) : - size_(0), - str_(NULL) - { - *this = rhs; - } - - //! \brief Destructor - frees memory used to hold the current value. - ~string() - { - delete[] str_; - str_ = NULL; - } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t size(void) const { return size_; } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t length(void) const { return size(); } - - /*! \brief Returns a pointer to the private copy held by this instance, - * or "" if empty/unset. - */ - const char * c_str(void) const { return (str_) ? str_ : "";} -} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -typedef cl::string STRING_CLASS; -#endif // #elif !defined(__USE_DEV_STRING) - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * - * \note Deprecated. Please use std::vector as default or - * re-define the vector class to match the std::vector - * interface by defining VECTOR_CLASS - - * \note Not recommended for use with custom objects as - * current implementation will construct N elements - * - * std::vector functionality. - * \brief Fixed sized vector compatible with std::vector. - * - * \note - * This differs from std::vector<> not just in memory allocation, - * but also in terms of when members are constructed, destroyed, - * and assigned instead of being copy constructed. - * - * \param T type of element contained in the vector. - * - * \param N maximum size of the vector. - */ -template -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector -{ -private: - T data_[N]; - unsigned int size_; - -public: - //! \brief Constructs an empty vector with no memory allocated. - vector() : - size_(static_cast(0)) - {} - - //! \brief Deallocates the vector's memory and destroys all of its elements. - ~vector() - { - clear(); - } - - //! \brief Returns the number of elements currently contained. - unsigned int size(void) const - { - return size_; - } - - /*! \brief Empties the vector of all elements. - * \note - * This does not deallocate memory but will invoke destructors - * on contained elements. - */ - void clear() - { - while(!empty()) { - pop_back(); - } - } - - /*! \brief Appends an element after the last valid element. - * Calling this on a vector that has reached capacity will throw an - * exception if exceptions are enabled. - */ - void push_back (const T& x) - { - if (size() < N) { - new (&data_[size_]) T(x); - size_++; - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Removes the last valid element from the vector. - * Calling this on an empty vector will throw an exception - * if exceptions are enabled. - */ - void pop_back(void) - { - if (size_ != 0) { - --size_; - data_[size_].~T(); - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Constructs with a value copied from another. - * - * \param vec the vector to copy. - */ - vector(const vector& vec) : - size_(vec.size_) - { - if (size_ != 0) { - assign(vec.begin(), vec.end()); - } - } - - /*! \brief Constructs with a specified number of initial elements. - * - * \param size number of initial elements. - * - * \param val value of initial elements. - */ - vector(unsigned int size, const T& val = T()) : - size_(0) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - /*! \brief Overwrites the current content with that copied from another - * instance. - * - * \param rhs vector to copy. - * - * \returns a reference to this. - */ - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ != 0) { - assign(rhs.begin(), rhs.end()); - } else { - clear(); - } - - return *this; - } - - /*! \brief Tests equality against another instance. - * - * \param vec the vector against which to compare. - */ - bool operator==(vector &vec) - { - if (size() != vec.size()) { - return false; - } - - for( unsigned int i = 0; i < size(); ++i ) { - if( operator[](i) != vec[i] ) { - return false; - } - } - return true; - } - - //! \brief Conversion operator to T*. - operator T* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const T* () const { return data_; } - - //! \brief Tests whether this instance has any elements. - bool empty (void) const - { - return size_==0; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int max_size (void) const - { - return N; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int capacity () const - { - return N; - } - - //! \brief Resizes the vector to the given size - void resize(unsigned int newSize, T fill = T()) - { - if (newSize > N) - { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - else - { - while (size_ < newSize) - { - new (&data_[size_]) T(fill); - size_++; - } - while (size_ > newSize) - { - --size_; - data_[size_].~T(); - } - } - } - - /*! \brief Returns a reference to a given element. - * - * \param index which element to access. * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - T& operator[](int index) - { - return data_[index]; - } - - /*! \brief Returns a const reference to a given element. - * - * \param index which element to access. - * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - const T& operator[](int index) const - { - return data_[index]; - } - - /*! \brief Assigns elements of the vector based on a source iterator range. - * - * \param start Beginning iterator of source range - * \param end Enditerator of source range - * - * \note - * Will throw an exception if exceptions are enabled and size exceeded. - */ - template - void assign(I start, I end) - { - clear(); - while(start != end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Const iterator class for vectors - */ - class iterator - { - private: - const vector *vec_; - int index_; - - /** - * Internal iterator constructor to capture reference - * to the vector it iterates over rather than taking - * the vector by copy. - */ - iterator (const vector &vec, int index) : - vec_(&vec) - { - if( !vec.empty() ) { - index_ = index; - } else { - index_ = -1; - } - } - - public: - iterator(void) : - index_(-1), - vec_(NULL) - { - } - - iterator(const iterator& rhs) : - vec_(rhs.vec_), - index_(rhs.index_) - { - } - - ~iterator(void) {} - - static iterator begin(const cl::vector &vec) - { - iterator i(vec, 0); - - return i; - } - - static iterator end(const cl::vector &vec) - { - iterator i(vec, vec.size()); - - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - iterator& operator++() - { - ++index_; - return *this; - } - - iterator operator++(int) - { - iterator retVal(*this); - ++index_; - return retVal; - } - - iterator& operator--() - { - --index_; - return *this; - } - - iterator operator--(int) - { - iterator retVal(*this); - --index_; - return retVal; - } - - const T& operator *() const - { - return (*vec_)[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator begin(void) const - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - iterator end(void) const - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_-1]; - } -} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) - - - - - -namespace detail { -#define __DEFAULT_NOT_INITIALIZED 1 -#define __DEFAULT_BEING_INITIALIZED 2 -#define __DEFAULT_INITIALIZED 4 - - /* - * Compare and exchange primitives are needed for handling of defaults - */ - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - inline int compare_exchange(std::atomic * dest, int exchange, int comparand) -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - inline int compare_exchange(volatile int * dest, int exchange, int comparand) -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - { -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - std::atomic_compare_exchange_strong(dest, &comparand, exchange); - return comparand; -#elif _MSC_VER - return (int)(_InterlockedCompareExchange( - (volatile long*)dest, - (long)exchange, - (long)comparand)); -#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED - return (__sync_val_compare_and_swap( - dest, - comparand, - exchange)); -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - } - - inline void fence() { -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - std::atomic_thread_fence(std::memory_order_seq_cst); -#elif _MSC_VER // !CL_HPP_CPP11_ATOMICS_SUPPORTED - _ReadWriteBarrier(); -#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED - __sync_synchronize(); -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - } -} // namespace detail - - -/*! \brief class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, whose - * size is known statically. - */ -template -class size_t -{ -private: - ::size_t data_[N]; - -public: - //! \brief Initialize size_t to all 0s - size_t() - { - for( int i = 0; i < N; ++i ) { - data_[i] = 0; - } - } - - ::size_t& operator[](int index) - { - return data_[index]; - } - - const ::size_t& operator[](int index) const - { - return data_[index]; - } - - //! \brief Conversion operator to T*. - operator ::size_t* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const ::size_t* () const { return data_; } -}; - -namespace detail { - -// Generic getInfoHelper. The final parameter is used to guide overload -// resolution: the actual parameter passed is an int, which makes this -// a worse conversion sequence than a specialization that declares the -// parameter as an int. -template -inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) -{ - return f(name, sizeof(T), param, NULL); -} - -// Specialized getInfoHelper for VECTOR_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; -} - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - typename T::cl_type * value = (typename T::cl_type *) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t elements = required / sizeof(typename T::cl_type); - param->assign(&value[0], &value[elements]); - for (::size_t i = 0; i < elements; i++) - { - if (value[i] != NULL) - { - err = (*param)[i].retain(); - if (err != CL_SUCCESS) { - return err; - } - } - } - return CL_SUCCESS; -} - -// Specialized for getInfo -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) -{ - cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for STRING_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) -{ -#if defined(__NO_STD_VECTOR) || defined(__NO_STD_STRING) - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*)alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; -#else - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - if (required > 0) { - // std::string has a constant data member - // a char vector does not - VECTOR_CLASS value(required); - err = f(name, required, value.data(), NULL); - if (err != CL_SUCCESS) { - return err; - } - if (param) { - param->assign(value.begin(), value.end() - 1u); - } - } - else if (param) { - param->assign(""); - } -#endif - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for cl::size_t params -template -inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t* value = (::size_t*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - for(int i = 0; i < N; ++i) { - (*param)[i] = value[i]; - } - - return CL_SUCCESS; -} - -template struct ReferenceHandler; - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) -{ - typename T::cl_type value; - cl_int err = f(name, sizeof(value), &value, NULL); - if (err != CL_SUCCESS) { - return err; - } - *param = value; - if (value != NULL) - { - err = param->retain(); - if (err != CL_SUCCESS) { - return err; - } - } - return CL_SUCCESS; -} - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_int) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - - -#if defined(CL_VERSION_1_2) -#define __PARAM_NAME_INFO_1_2(F) \ - F(cl_image_info, CL_IMAGE_ARRAY_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ - F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ - F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) \ - \ - F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ - F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ - \ - F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ - \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_LINKER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PRINTF_BUFFER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, cl_bool) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) -#endif // #if defined(CL_VERSION_1_2) - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 -#if defined(CL_VERSION_1_2) -__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#ifdef CL_PLATFORM_ICD_SUFFIX_KHR -__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) -#endif - -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) -#endif - -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) -#endif - -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) -#endif -#ifdef CL_DEVICE_WARP_SIZE_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) -#endif -#ifdef CL_DEVICE_GPU_OVERLAP_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) -#endif -#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) -#endif -#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) -#endif - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return getInfoHelper(f, name, param, 0); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return getInfoHelper(f0, name, param, 0); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return getInfoHelper(f0, name, param, 0); -} - -template -struct ReferenceHandler -{ }; - -#if defined(CL_VERSION_1_2) -/** - * OpenCL 1.2 devices do have retain/release. - */ -template <> -struct ReferenceHandler -{ - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int retain(cl_device_id device) - { return ::clRetainDevice(device); } - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int release(cl_device_id device) - { return ::clReleaseDevice(device); } -}; -#else // #if defined(CL_VERSION_1_2) -/** - * OpenCL 1.1 devices do not have retain/release. - */ -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_SUCCESS; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_SUCCESS; } -}; -#endif // #if defined(CL_VERSION_1_2) - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_SUCCESS; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_SUCCESS; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - - -// Extracts version number with major in the upper 16 bits, minor in the lower 16 -static cl_uint getVersion(const char *versionInfo) -{ - int highVersion = 0; - int lowVersion = 0; - int index = 7; - while(versionInfo[index] != '.' ) { - highVersion *= 10; - highVersion += versionInfo[index]-'0'; - ++index; - } - ++index; - while(versionInfo[index] != ' ' && versionInfo[index] != '\0') { - lowVersion *= 10; - lowVersion += versionInfo[index]-'0'; - ++index; - } - return (highVersion << 16) | lowVersion; -} - -static cl_uint getPlatformVersion(cl_platform_id platform) -{ - ::size_t size = 0; - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); - char *versionInfo = (char *) alloca(size); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); - return getVersion(versionInfo); -} - -static cl_uint getDevicePlatformVersion(cl_device_id device) -{ - cl_platform_id platform; - clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); - return getPlatformVersion(platform); -} - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -static cl_uint getContextPlatformVersion(cl_context context) -{ - // The platform cannot be queried directly, so we first have to grab a - // device and obtain its context - ::size_t size = 0; - clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); - if (size == 0) - return 0; - cl_device_id *devices = (cl_device_id *) alloca(size); - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); - return getDevicePlatformVersion(devices[0]); -} -#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - Wrapper(const cl_type &obj) : object_(obj) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT - { - object_ = rhs.object_; - rhs.object_ = NULL; - } -#endif - - Wrapper& operator = (const Wrapper& rhs) - { - if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper& operator = (Wrapper&& rhs) - { - if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - rhs.object_ = NULL; - } - return *this; - } -#endif - - Wrapper& operator = (const cl_type &rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs; - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -template <> -class Wrapper -{ -public: - typedef cl_device_id cl_type; - -protected: - cl_type object_; - bool referenceCountable_; - - static bool isReferenceCountable(cl_device_id device) - { - bool retVal = false; - if (device != NULL) { - int version = getDevicePlatformVersion(device); - if(version > ((1 << 16) + 1)) { - retVal = true; - } - } - return retVal; - } - -public: - Wrapper() : object_(NULL), referenceCountable_(false) - { - } - - Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) - { - referenceCountable_ = isReferenceCountable(obj); - } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - referenceCountable_ = isReferenceCountable(object_); - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT - { - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = NULL; - rhs.referenceCountable_ = false; - } -#endif - - Wrapper& operator = (const Wrapper& rhs) - { - if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper& operator = (Wrapper&& rhs) - { - if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = NULL; - rhs.referenceCountable_ = false; - } - return *this; - } -#endif - - Wrapper& operator = (const cl_type &rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs; - referenceCountable_ = isReferenceCountable(object_); - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - template - friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); - - cl_int retain() const - { - if( referenceCountable_ ) { - return ReferenceHandler::retain(object_); - } - else { - return CL_SUCCESS; - } - } - - cl_int release() const - { - if( referenceCountable_ ) { - return ReferenceHandler::release(object_); - } - else { - return CL_SUCCESS; - } - } -}; - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief Adds constructors and member functions for cl_image_format. - * - * \see cl_image_format - */ -struct ImageFormat : public cl_image_format -{ - //! \brief Default constructor - performs no initialization. - ImageFormat(){} - - //! \brief Initializing constructor. - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - //! \brief Assignment operator. - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \brief Class interface for cl_device_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_device_id - */ -class Device : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Device() : detail::Wrapper() { } - - /*! \brief Constructor from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - __CL_EXPLICIT_CONSTRUCTORS Device(const cl_device_id &device) : detail::Wrapper(device) { } - - /*! \brief Returns the first device on the default context. - * - * \see Context::getDefault() - */ - static Device getDefault(cl_int * err = NULL); - - /*! \brief Assignment operator from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device& operator = (const cl_device_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Device(const Device& dev) : detail::Wrapper(dev) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Device& operator = (const Device &dev) - { - detail::Wrapper::operator=(dev); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Device(Device&& dev) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(dev)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Device& operator = (Device &&dev) - { - detail::Wrapper::operator=(std::move(dev)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetDeviceInfo(). - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - //! \brief Wrapper for clGetDeviceInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /** - * CL 1.2 version - */ -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clCreateSubDevicesEXT(). - cl_int createSubDevices( - const cl_device_partition_property * properties, - VECTOR_CLASS* devices) - { - cl_uint n = 0; - cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = clCreateSubDevices(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(CL_VERSION_1_2) - -/** - * CL 1.1 version that uses device fission. - */ -#if defined(CL_VERSION_1_1) -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(USE_CL_DEVICE_FISSION) -#endif // #if defined(CL_VERSION_1_1) -}; - -/*! \brief Class interface for cl_platform_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_platform_id - */ -class Platform : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Platform() : detail::Wrapper() { } - - /*! \brief Constructor from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - __CL_EXPLICIT_CONSTRUCTORS Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } - - /*! \brief Assignment operator from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform& operator = (const cl_platform_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetPlatformInfo(). - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - //! \brief Wrapper for clGetPlatformInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of devices for this platform. - * - * Wraps clGetDeviceIDs(). - */ - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - /*! \brief Gets a list of available platforms. - * - * Wraps clGetPlatformIDs(). - */ - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - - if( platforms == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static cl_int get( - Platform * platform) - { - cl_uint n = 0; - - if( platform == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - *platform = ids[0]; - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform, returning it by value. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static Platform get( - cl_int * errResult = NULL) - { - Platform platform; - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) { - *errResult = err; - } - return Platform(); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) { - *errResult = err; - } - return Platform(); - } - - - return Platform(ids[0]); - } - - static Platform getDefault( - cl_int *errResult = NULL ) - { - return get(errResult); - } - - -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clUnloadCompiler(). - cl_int - unloadCompiler() - { - return ::clUnloadPlatformCompiler(object_); - } -#endif // #if defined(CL_VERSION_1_2) -}; // class Platform - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -/** - * Unload the OpenCL compiler. - * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int -UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} -#endif // #if defined(CL_VERSION_1_1) - -/*! \brief Class interface for cl_context. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_context as the original. For details, see - * clRetainContext() and clReleaseContext(). - * - * \see cl_context - */ -class Context - : public detail::Wrapper -{ -private: - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - static std::atomic default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static volatile int default_initialized_; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static Context default_; - static volatile cl_int default_error_; -public: - /*! \brief Constructs a context including a list of specified devices. - * - * Wraps clCreateContext(). - */ - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateContext( - properties, (cl_uint) numDevices, - deviceIDs, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - const Device& device, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - cl_device_id deviceID = device(); - - object_ = ::clCreateContext( - properties, 1, - &deviceID, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a context including all or a subset of devices of a specified type. - * - * Wraps clCreateContextFromType(). - */ - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - -#if !defined(__APPLE__) && !defined(__MACOS) - cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; - - if (properties == NULL) { - // Get a valid platform ID as we cannot send in a blank one - VECTOR_CLASS platforms; - error = Platform::get(&platforms); - if (error != CL_SUCCESS) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - // Check the platforms we found for a device of our specified type - cl_context_properties platform_id = 0; - for (unsigned int i = 0; i < platforms.size(); i++) { - - VECTOR_CLASS devices; - -#if defined(__CL_ENABLE_EXCEPTIONS) - try { -#endif - - error = platforms[i].getDevices(type, &devices); - -#if defined(__CL_ENABLE_EXCEPTIONS) - } catch (Error &) {} - // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type - // We do error checking next anyway, and can throw there if needed -#endif - - // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND - if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - if (devices.size() > 0) { - platform_id = (cl_context_properties)platforms[i](); - break; - } - } - - if (platform_id == 0) { - detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = CL_DEVICE_NOT_FOUND; - } - return; - } - - prop[1] = platform_id; - properties = &prop[0]; - } -#endif - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Context(const Context& ctx) : detail::Wrapper(ctx) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Context& operator = (const Context &ctx) - { - detail::Wrapper::operator=(ctx); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Context(Context&& ctx) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(ctx)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Context& operator = (Context &&ctx) - { - detail::Wrapper::operator=(std::move(ctx)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. - * - * \note All calls to this function return the same cl_context as the first. - */ - static Context getDefault(cl_int * err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - cl_int error; - default_ = Context( - CL_DEVICE_TYPE_DEFAULT, - NULL, - NULL, - NULL, - &error); - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) { - *err = default_error_; - } - return default_; - - } - - //! \brief Default constructor - initializes to NULL. - Context() : detail::Wrapper() { } - - /*! \brief Constructor from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_context - * into the new Context object. - */ - __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } - - /*! \brief Assignment operator from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseContext() on the value previously held by this instance. - */ - Context& operator = (const cl_context& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetContextInfo(). - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - //! \brief Wrapper for clGetContextInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of supported image formats. - * - * Wraps clGetSupportedImageFormats(). - */ - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - - if (!formats) { - return CL_SUCCESS; - } - - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - if (numEntries > 0) { - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*)value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - } - else { - formats->clear(); - } - return CL_SUCCESS; - } -}; - -inline Device Device::getDefault(cl_int * err) -{ - cl_int error; - Device device; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - device = context.getInfo()[0]; - if (err != NULL) { - *err = CL_SUCCESS; - } - } - - return device; -} - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -CL_WEAK_ATTRIB_PREFIX std::atomic CL_WEAK_ATTRIB_SUFFIX Context::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -CL_WEAK_ATTRIB_PREFIX volatile int CL_WEAK_ATTRIB_SUFFIX Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - -CL_WEAK_ATTRIB_PREFIX Context CL_WEAK_ATTRIB_SUFFIX Context::default_; -CL_WEAK_ATTRIB_PREFIX volatile cl_int CL_WEAK_ATTRIB_SUFFIX Context::default_error_ = CL_SUCCESS; - -/*! \brief Class interface for cl_event. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_event as the original. For details, see - * clRetainEvent() and clReleaseEvent(). - * - * \see cl_event - */ -class Event : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Event() : detail::Wrapper() { } - - /*! \brief Constructor from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_event - * into the new Event object. - */ - __CL_EXPLICIT_CONSTRUCTORS Event(const cl_event& event) : detail::Wrapper(event) { } - - /*! \brief Assignment operator from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseEvent() on the value previously held by this instance. - */ - Event& operator = (const cl_event& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetEventInfo(). - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - //! \brief Wrapper for clGetEventInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - //! \brief Wrapper for clGetEventProfilingInfo(). - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Blocks the calling thread until this event completes. - * - * Wraps clWaitForEvents(). - */ - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a user callback function for a specific command execution status. - * - * Wraps clSetEventCallback(). - */ - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - /*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), - __WAIT_FOR_EVENTS_ERR); - } -}; - -#if defined(CL_VERSION_1_1) -/*! \brief Class interface for user events (a subset of cl_event's). - * - * See Event for details about copy semantics, etc. - */ -class UserEvent : public Event -{ -public: - /*! \brief Constructs a user event on a given context. - * - * Wraps clCreateUserEvent(). - */ - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - UserEvent() : Event() { } - - /*! \brief Sets the execution status of a user event object. - * - * Wraps clSetUserEventStatus(). - */ - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -/*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \brief Class interface for cl_mem. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_mem as the original. For details, see - * clRetainMemObject() and clReleaseMemObject(). - * - * \see cl_mem - */ -class Memory : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Memory() : detail::Wrapper() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_mem - * into the new Memory object. - */ - __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } - - /*! \brief Assignment operator from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseMemObject() on the value previously held by this instance. - */ - Memory& operator = (const cl_mem& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Memory(const Memory& mem) : detail::Wrapper(mem) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Memory& operator = (const Memory &mem) - { - detail::Wrapper::operator=(mem); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Memory(Memory&& mem) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(mem)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Memory& operator = (Memory &&mem) - { - detail::Wrapper::operator=(std::move(mem)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetMemObjectInfo(). - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - //! \brief Wrapper for clGetMemObjectInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a callback function to be called when the memory object - * is no longer needed. - * - * Wraps clSetMemObjectDestructorCallback(). - * - * Repeated calls to this function, for a given cl_mem value, will append - * to the list of functions called (in reverse order) when memory object's - * resources are freed and the memory object is deleted. - * - * \note - * The registered callbacks are associated with the underlying cl_mem - * value - not the Memory class instance. - */ - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -// Pre-declare copy functions -class Buffer; -template< typename IteratorType > -cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); - - -/*! \brief Class interface for Buffer Memory Objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Buffer : public Memory -{ -public: - - /*! \brief Constructs a Buffer in a specified context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - */ - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a Buffer in the default context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - * - * \see Context::getDefault() - */ - Buffer( - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(err); - - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer( - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr = false, - cl_int* err = NULL) - { - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - Context context = Context::getDefault(err); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - error = cl::copy(startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified context. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = NULL); - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified queue. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = NULL); - - //! \brief Default constructor - initializes to NULL. - Buffer() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Buffer& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Buffer(const Buffer& buf) : Memory(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Buffer& operator = (const Buffer &buf) - { - Memory::operator=(buf); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Buffer(Buffer&& buf) CL_HPP_NOEXCEPT : Memory(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Buffer& operator = (Buffer &&buf) - { - Memory::operator=(std::move(buf)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - -#if defined(CL_VERSION_1_1) - /*! \brief Creates a new buffer object from this. - * - * Wraps clCreateSubBuffer(). - */ - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. - * - * This is provided to facilitate interoperability with Direct3D. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - /*! \brief Constructs a BufferD3D10, in a specified context, from a - * given ID3D10Buffer. - * - * Wraps clCreateFromD3D10BufferKHR(). - */ - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - -#if defined(CL_VERSION_1_2) - vector props = context.getInfo(); - cl_platform platform = -1; - for( int i = 0; i < props.size(); ++i ) { - if( props[i] == CL_CONTEXT_PLATFORM ) { - platform = props[i+1]; - } - } - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); -#endif - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferD3D10() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(const BufferD3D10& buf) : Buffer(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10& operator = (const BufferD3D10 &buf) - { - Buffer::operator=(buf); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(BufferD3D10&& buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10& operator = (BufferD3D10 &&buf) - { - Buffer::operator=(std::move(buf)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif - -/*! \brief Class interface for GL Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferGL : public Buffer -{ -public: - /*! \brief Constructs a BufferGL in a specified context, from a given - * GL buffer. - * - * Wraps clCreateFromGLBuffer(). - */ - BufferGL( - const Context& context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferGL() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferGL(const BufferGL& buf) : Buffer(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferGL& operator = (const BufferGL &buf) - { - Buffer::operator=(buf); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferGL(BufferGL&& buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferGL& operator = (BufferGL &&buf) - { - Buffer::operator=(std::move(buf)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief C++ base class for Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image : public Memory -{ -protected: - //! \brief Default constructor - initializes to NULL. - Image() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image(const Image& img) : Memory(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image& operator = (const Image &img) - { - Memory::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image(Image&& img) CL_HPP_NOEXCEPT : Memory(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image& operator = (Image &&img) - { - Memory::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - -public: - //! \brief Wrapper for clGetImageInfo(). - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - //! \brief Wrapper for clGetImageInfo() that returns by value. - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -#if defined(CL_VERSION_1_2) -/*! \brief Class interface for 1D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image1D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image1D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D, - width, - 0, 0, 0, 0, 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image1D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image1D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1D(const Image1D& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1D& operator = (const Image1D &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1D(Image1D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1D& operator = (Image1D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -/*! \class Image1DBuffer - * \brief Image interface for 1D buffer images. - */ -class Image1DBuffer : public Image -{ -public: - Image1DBuffer( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - const Buffer &buffer, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - width, - 0, 0, 0, 0, 0, 0, 0, - buffer() - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - NULL, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DBuffer() { } - - __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } - - Image1DBuffer& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer(const Image1DBuffer& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer& operator = (const Image1DBuffer &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer(Image1DBuffer&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer& operator = (Image1DBuffer &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -/*! \class Image1DArray - * \brief Image interface for arrays of 1D images. - */ -class Image1DArray : public Image -{ -public: - Image1DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t rowPitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_ARRAY, - width, - 0, 0, // height, depth (unused) - arraySize, - rowPitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DArray() { } - - __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } - - Image1DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DArray(const Image1DArray& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DArray& operator = (const Image1DArray &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DArray(Image1DArray&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DArray& operator = (Image1DArray &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if defined(CL_VERSION_1_2) - - -/*! \brief Class interface for 2D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image2D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image2D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2D(const Image2D& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2D& operator = (const Image2D &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2D(Image2D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2D& operator = (Image2D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 2D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D -{ -public: - /*! \brief Constructs an Image2DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture2D(). - */ - Image2DGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); - if (err != NULL) { - *err = error; - } - - } - - //! \brief Default constructor - initializes to NULL. - Image2DGL() : Image2D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2DGL& operator = (const cl_mem& rhs) - { - Image2D::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DGL(const Image2DGL& img) : Image2D(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DGL& operator = (const Image2DGL &img) - { - Image2D::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DGL(Image2DGL&& img) CL_HPP_NOEXCEPT : Image2D(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DGL& operator = (Image2DGL &&img) - { - Image2D::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class Image2DArray - * \brief Image interface for arrays of 2D images. - */ -class Image2DArray : public Image -{ -public: - Image2DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t height, - ::size_t rowPitch, - ::size_t slicePitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D_ARRAY, - width, - height, - 0, // depth (unused) - arraySize, - rowPitch, - slicePitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DArray() { } - - __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } - - Image2DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DArray(const Image2DArray& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DArray& operator = (const Image2DArray &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DArray(Image2DArray&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DArray& operator = (Image2DArray &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for 3D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3D : public Image -{ -public: - /*! \brief Constructs a 3D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE3D, - width, - height, - depth, - 0, // array size (unused) - row_pitch, - slice_pitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image3D() : Image() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3D(const Image3D& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3D& operator = (const Image3D &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3D(Image3D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3D& operator = (Image3D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 3D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3DGL : public Image3D -{ -public: - /*! \brief Constructs an Image3DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture3D(). - */ - Image3DGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image3DGL() : Image3D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL& operator = (const cl_mem& rhs) - { - Image3D::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3DGL(const Image3DGL& img) : Image3D(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3DGL& operator = (const Image3DGL &img) - { - Image3D::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3DGL(Image3DGL&& img) CL_HPP_NOEXCEPT : Image3D(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3DGL& operator = (Image3DGL &&img) - { - Image3D::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class ImageGL - * \brief general image interface for GL interop. - * We abstract the 2D and 3D GL images into a single instance here - * that wraps all GL sourced images on the grounds that setup information - * was performed by OpenCL anyway. - */ -class ImageGL : public Image -{ -public: - ImageGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); - if (err != NULL) { - *err = error; - } - } - - ImageGL() : Image() { } - - __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } - - ImageGL& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - ImageGL(const ImageGL& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - ImageGL& operator = (const ImageGL &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - ImageGL(ImageGL&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - ImageGL& operator = (ImageGL &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for GL Render Buffer Memory Objects. -* -* This is provided to facilitate interoperability with OpenGL. -* -* See Memory for details about copy semantics, etc. -* -* \see Memory -*/ -class BufferRenderGL : -#if defined(CL_VERSION_1_2) - public ImageGL -#else // #if defined(CL_VERSION_1_2) - public Image2DGL -#endif //#if defined(CL_VERSION_1_2) -{ -public: - /*! \brief Constructs a BufferRenderGL in a specified context, from a given - * GL Renderbuffer. - * - * Wraps clCreateFromGLRenderbuffer(). - */ - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. -#if defined(CL_VERSION_1_2) - BufferRenderGL() : ImageGL() {}; -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL() : Image2DGL() {}; -#endif //#if defined(CL_VERSION_1_2) - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ -#if defined(CL_VERSION_1_2) - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : ImageGL(buffer) { } -#else // #if defined(CL_VERSION_1_2) - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Image2DGL(buffer) { } -#endif //#if defined(CL_VERSION_1_2) - - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL& operator = (const cl_mem& rhs) - { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(rhs); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(rhs); -#endif //#if defined(CL_VERSION_1_2) - - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ -#if defined(CL_VERSION_1_2) - BufferRenderGL(const BufferRenderGL& buf) : ImageGL(buf) {} -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL(const BufferRenderGL& buf) : Image2DGL(buf) {} -#endif //#if defined(CL_VERSION_1_2) - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL& operator = (const BufferRenderGL &rhs) - { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(rhs); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(rhs); -#endif //#if defined(CL_VERSION_1_2) - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ -#if defined(CL_VERSION_1_2) - BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT : ImageGL(std::move(buf)) {} -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT : Image2DGL(std::move(buf)) {} -#endif //#if defined(CL_VERSION_1_2) - - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL& operator = (BufferRenderGL &&buf) - { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(std::move(buf)); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(std::move(buf)); -#endif //#if defined(CL_VERSION_1_2) - - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_, type, gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief Class interface for cl_sampler. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_sampler as the original. For details, see - * clRetainSampler() and clReleaseSampler(). - * - * \see cl_sampler - */ -class Sampler : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Sampler() { } - - /*! \brief Constructs a Sampler in a specified context. - * - * Wraps clCreateSampler(). - */ - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructor from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_sampler - * into the new Sampler object. - */ - __CL_EXPLICIT_CONSTRUCTORS Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } - - /*! \brief Assignment operator from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseSampler() on the value previously held by this instance. - */ - Sampler& operator = (const cl_sampler& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Sampler(const Sampler& sam) : detail::Wrapper(sam) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Sampler& operator = (const Sampler &sam) - { - detail::Wrapper::operator=(sam); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Sampler(Sampler&& sam) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(sam)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Sampler& operator = (Sampler &&sam) - { - detail::Wrapper::operator=(std::move(sam)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetSamplerInfo(). - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - //! \brief Wrapper for clGetSamplerInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -class Program; -class CommandQueue; -class Kernel; - -//! \brief Class interface for specifying NDRange values. -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - //! \brief Default constructor - resulting range has zero dimensions. - NDRange() - : dimensions_(0) - { } - - //! \brief Constructs one-dimensional range. - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_[0] = size0; - } - - //! \brief Constructs two-dimensional range. - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_[0] = size0; - sizes_[1] = size1; - } - - //! \brief Constructs three-dimensional range. - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_[0] = size0; - sizes_[1] = size1; - sizes_[2] = size2; - } - - /*! \brief Conversion operator to const ::size_t *. - * - * \returns a pointer to the size of the first dimension. - */ - operator const ::size_t*() const { - return (const ::size_t*) sizes_; - } - - //! \brief Queries the number of dimensions in the range. - ::size_t dimensions() const { return dimensions_; } -}; - -//! \brief A zero-dimensional range. -static const NDRange NullRange; - -//! \brief Local address wrapper for use with Kernel::setArg -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static const T* ptr(const T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static const void* ptr(const LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -/*! __local - * \brief Helper function for generating LocalSpaceArg objects. - * Deprecated. Replaced with Local. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg -__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -/*! Local - * \brief Helper function for generating LocalSpaceArg objects. - */ -inline LocalSpaceArg -Local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -//class KernelFunctor; - -/*! \brief Class interface for cl_kernel. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_kernel as the original. For details, see - * clRetainKernel() and clReleaseKernel(). - * - * \see cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - //! \brief Default constructor - initializes to NULL. - Kernel() { } - - /*! \brief Constructor from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_kernel - * into the new Kernel object. - */ - __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } - - /*! \brief Assignment operator from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseKernel() on the value previously held by this instance. - */ - Kernel& operator = (const cl_kernel& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Kernel& operator = (const Kernel &kernel) - { - detail::Wrapper::operator=(kernel); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Kernel(Kernel&& kernel) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(kernel)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Kernel& operator = (Kernel &&kernel) - { - detail::Wrapper::operator=(std::move(kernel)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_2) - template - cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), - __GET_KERNEL_ARG_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getArgInfo(cl_uint argIndex, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_arg_info, name>::param_type param; - cl_int result = getArgInfo(argIndex, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -#endif // #if defined(CL_VERSION_1_2) - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, const T &value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, const void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } -}; - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const STRING_CLASS& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const ::size_t length = source.size(); - - Context context = Context::getDefault(err); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const STRING_CLASS& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const ::size_t length = source.size(); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - /** - * Construct a program object from a list of devices and a per-device list of binaries. - * \param context A valid OpenCL context in which to construct the program. - * \param devices A vector of OpenCL device objects for which the program will be created. - * \param binaries A vector of pairs of a pointer to a binary object and its length. - * \param binaryStatus An optional vector that on completion will be resized to - * match the size of binaries and filled with values to specify if each binary - * was successfully loaded. - * Set to CL_SUCCESS if the binary was successfully loaded. - * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. - * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. - * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: - * CL_INVALID_CONTEXT if context is not a valid context. - * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; - * or if any entry in binaries is NULL or has length 0. - * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. - * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. - */ - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t numDevices = devices.size(); - - // Catch size mismatch early and return - if(binaries.size() != numDevices) { - error = CL_INVALID_VALUE; - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); - - for (::size_t i = 0; i < numDevices; ++i) { - images[i] = (const unsigned char*)binaries[i].first; - lengths[i] = binaries[(int)i].second; - } - - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - if(binaryStatus) { - binaryStatus->resize(numDevices); - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - deviceIDs, - lengths, images, (binaryStatus != NULL && numDevices > 0) - ? &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - -#if defined(CL_VERSION_1_2) - /** - * Create program using builtin kernels. - * \param kernelNames Semi-colon separated list of builtin kernel names - */ - Program( - const Context& context, - const VECTOR_CLASS& devices, - const STRING_CLASS& kernelNames, - cl_int* err = NULL) - { - cl_int error; - - - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateProgramWithBuiltInKernels( - context(), - (cl_uint) devices.size(), - deviceIDs, - kernelNames.c_str(), - &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) - - Program() { } - - __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } - - Program& operator = (const cl_program& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Program(const Program& program) : detail::Wrapper(program) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Program& operator = (const Program &program) - { - detail::Wrapper::operator=(program); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Program(Program&& program) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(program)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Program& operator = (Program &&program) - { - detail::Wrapper::operator=(std::move(program)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - deviceIDs, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - cl_int build( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - 0, - NULL, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - -#if defined(CL_VERSION_1_2) - cl_int compile( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clCompileProgram( - object_, - 0, - NULL, - options, - 0, - NULL, - NULL, - notifyFptr, - data), - __COMPILE_PROGRAM_ERR); - } -#endif - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -#if defined(CL_VERSION_1_2) -inline Program linkProgram( - Program input1, - Program input2, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int error_local = CL_SUCCESS; - - cl_program programs[2] = { input1(), input2() }; - - Context ctx = input1.getInfo(&error_local); - if(error_local!=CL_SUCCESS) { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - 2, - programs, - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = error_local; - } - - return Program(prog); -} - -inline Program linkProgram( - VECTOR_CLASS inputPrograms, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int error_local = CL_SUCCESS; - - cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); - - if (programs != NULL) { - for (unsigned int i = 0; i < inputPrograms.size(); i++) { - programs[i] = inputPrograms[i](); - } - } - - Context ctx; - if(inputPrograms.size() > 0) { - ctx = inputPrograms[0].getInfo(&error_local); - if(error_local!=CL_SUCCESS) { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - } - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - (cl_uint)inputPrograms.size(), - programs, - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = error_local; - } - - return Program(prog); -} -#endif - -template<> -inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const -{ - VECTOR_CLASS< ::size_t> sizes = getInfo(); - VECTOR_CLASS binaries; - for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) - { - char *ptr = NULL; - if (*s != 0) - ptr = new char[*s]; - binaries.push_back(ptr); - } - - cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); - if (err != NULL) { - *err = result; - } - return binaries; -} - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -private: -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - static std::atomic default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static volatile int default_initialized_; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static CommandQueue default_; - static volatile cl_int default_error_; -public: - CommandQueue( - cl_command_queue_properties properties, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - } - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context - */ - explicit CommandQueue( - const Context& context, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - VECTOR_CLASS devices; - error = context.getInfo(CL_CONTEXT_DEVICES, &devices); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) { - *err = error; - } - return; - } - - object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (err != NULL) { - *err = error; - } - - } - - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(const CommandQueue& queue) : detail::Wrapper(queue) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue& operator = (const CommandQueue &queue) - { - detail::Wrapper::operator=(queue); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(CommandQueue&& queue) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(queue)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - CommandQueue& operator = (CommandQueue &&queue) - { - detail::Wrapper::operator=(std::move(queue)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - static CommandQueue getDefault(cl_int * err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - - default_ = CommandQueue(context, device, 0, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) { - *err = default_error_; - } - return default_; - - } - - CommandQueue() { } - - __CL_EXPLICIT_CONSTRUCTORS CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const cl_command_queue& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#if defined(CL_VERSION_1_1) - - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - const void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif //if defined(CL_VERSION_1_1) - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill a buffer object with a pattern - * of a given size. The pattern is specified a as vector. - * \tparam PatternType The datatype of the pattern field. - * The pattern type must be an accepted OpenCL data type. - */ - template - cl_int enqueueFillBuffer( - const Buffer& buffer, - PatternType pattern, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillBuffer( - object_, - buffer(), - static_cast(&pattern), - sizeof(PatternType), - offset, - size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA floating-point color value if - * the image channel data type is not an unnormalized signed or - * unsigned data type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_float4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA signed integer color value if - * the image channel data type is an unnormalized signed integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_int4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA unsigned integer color value if - * the image channel data type is an unnormalized unsigned integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_uint4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_event tmp; - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - if (event != NULL && error == CL_SUCCESS) - *event = tmp; - - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_event tmp; - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - if (event != NULL && error == CL_SUCCESS) - *event = tmp; - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueues a marker command which waits for either a list of events to complete, - * or all previously enqueued commands to complete. - * - * Enqueues a marker command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command returns an event which can be waited on, - * i.e. this event can be waited on to insure that all events either in the event_wait_list - * or all previously enqueued commands, queued before this command to command_queue, - * have completed. - */ - cl_int enqueueMarkerWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarkerWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * A synchronization point that enqueues a barrier operation. - * - * Enqueues a barrier command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command blocks command execution, that is, any - * following commands enqueued after it do not execute until it completes. This command - * returns an event which can be waited on, i.e. this event can be waited on to insure that - * all events either in the event_wait_list or all previously enqueued commands, queued - * before this command to command_queue, have completed. - */ - cl_int enqueueBarrierWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueBarrierWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_BARRIER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command to indicate with which device a set of memory objects - * should be associated. - */ - cl_int enqueueMigrateMemObjects( - const VECTOR_CLASS &memObjects, - cl_mem_migration_flags flags, - const VECTOR_CLASS* events = NULL, - Event* event = NULL - ) const - { - cl_event tmp; - - cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); - for( int i = 0; i < (int)memObjects.size(); ++i ) { - localMemObjects[i] = memObjects[i](); - } - - - cl_int err = detail::errHandler( - ::clEnqueueMigrateMemObjects( - object_, - (cl_uint)memObjects.size(), - static_cast(localMemObjects), - flags, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local = NullRange, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NDRANGE_KERNEL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_TASK_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueNativeKernel( - void (CL_CALLBACK *userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL && mem_locs->size() > 0) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NATIVE_KERNEL); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarker( - object_, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - events.size() > 0 ? (const cl_event*) &events.front() : NULL), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); -#endif - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_2) -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_1) - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -CL_WEAK_ATTRIB_PREFIX std::atomic CL_WEAK_ATTRIB_SUFFIX CommandQueue::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -CL_WEAK_ATTRIB_PREFIX volatile int CL_WEAK_ATTRIB_SUFFIX CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - -CL_WEAK_ATTRIB_PREFIX CommandQueue CL_WEAK_ATTRIB_SUFFIX CommandQueue::default_; -CL_WEAK_ATTRIB_PREFIX volatile cl_int CL_WEAK_ATTRIB_SUFFIX CommandQueue::default_error_ = CL_SUCCESS; - -template< typename IteratorType > -Buffer::Buffer( - const Context &context, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - CommandQueue queue(context, 0, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } -} - -template< typename IteratorType > -Buffer::Buffer( - const CommandQueue &queue, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if (readOnly) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if (useHostPtr) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - Context context = queue.getInfo(); - - if (useHostPtr) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } - else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if (!useHostPtr) { - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } -} - -inline cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - void * result = ::clEnqueueMapBuffer( - queue(), buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; -} - -inline cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (error != CL_SUCCESS) { - return error; - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - queue(), memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; -} - -inline cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, startIterator, endIterator, buffer); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, buffer, startIterator, endIterator); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } -#if defined(_MSC_VER) - std::copy( - startIterator, - endIterator, - stdext::checked_array_iterator( - pointer, length)); -#else - std::copy(startIterator, endIterator, pointer); -#endif - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } - std::copy(pointer, pointer + length, startIterator); - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -#if defined(CL_VERSION_1_1) -inline cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - const void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferRect( - src, - dst, - src_origin, - dst_origin, - region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); -} -#endif - -inline cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImage( - src, - dst, - src_origin, - dst_origin, - region, - events, - event); -} - -inline cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImageToBuffer( - src, - dst, - src_origin, - region, - dst_offset, - events, - event); -} - -inline cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferToImage( - src, - dst, - src_offset, - dst_origin, - region, - events, - event); -} - - -inline cl_int flush(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.flush(); -} - -inline cl_int finish(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - - return queue.finish(); -} - -// Kernel Functor support -// New interface as of September 2011 -// Requires the C++11 std::tr1::function (note do not support TR1) -// Visual Studio 2010 and GCC 4.2 - -struct EnqueueArgs -{ - CommandQueue queue_; - const NDRange offset_; - const NDRange global_; - const NDRange local_; - VECTOR_CLASS events_; - - EnqueueArgs(NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(Event e, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } -}; - -namespace detail { - -class NullType {}; - -template -struct SetArg -{ - static void set (Kernel kernel, T0 arg) - { - kernel.setArg(index, arg); - } -}; - -template -struct SetArg -{ - static void set (Kernel, NullType) - { - } -}; - -template < - typename T0, typename T1, typename T2, typename T3, - typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, - typename T12, typename T13, typename T14, typename T15, - typename T16, typename T17, typename T18, typename T19, - typename T20, typename T21, typename T22, typename T23, - typename T24, typename T25, typename T26, typename T27, - typename T28, typename T29, typename T30, typename T31 - -> -class KernelFunctorGlobal -{ -private: - Kernel kernel_; - -public: - KernelFunctorGlobal( - Kernel kernel) : - kernel_(kernel) - {} - - KernelFunctorGlobal( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - kernel_(program, name.c_str(), err) - {} - - Event operator() ( - const EnqueueArgs& args, - T0 t0, - T1 t1 = NullType(), - T2 t2 = NullType(), - T3 t3 = NullType(), - T4 t4 = NullType(), - T5 t5 = NullType(), - T6 t6 = NullType(), - T7 t7 = NullType(), - T8 t8 = NullType(), - T9 t9 = NullType(), - T10 t10 = NullType(), - T11 t11 = NullType(), - T12 t12 = NullType(), - T13 t13 = NullType(), - T14 t14 = NullType(), - T15 t15 = NullType(), - T16 t16 = NullType(), - T17 t17 = NullType(), - T18 t18 = NullType(), - T19 t19 = NullType(), - T20 t20 = NullType(), - T21 t21 = NullType(), - T22 t22 = NullType(), - T23 t23 = NullType(), - T24 t24 = NullType(), - T25 t25 = NullType(), - T26 t26 = NullType(), - T27 t27 = NullType(), - T28 t28 = NullType(), - T29 t29 = NullType(), - T30 t30 = NullType(), - T31 t31 = NullType() - - ) - { - Event event; - SetArg<0, T0>::set(kernel_, t0); - SetArg<1, T1>::set(kernel_, t1); - SetArg<2, T2>::set(kernel_, t2); - SetArg<3, T3>::set(kernel_, t3); - SetArg<4, T4>::set(kernel_, t4); - SetArg<5, T5>::set(kernel_, t5); - SetArg<6, T6>::set(kernel_, t6); - SetArg<7, T7>::set(kernel_, t7); - SetArg<8, T8>::set(kernel_, t8); - SetArg<9, T9>::set(kernel_, t9); - SetArg<10, T10>::set(kernel_, t10); - SetArg<11, T11>::set(kernel_, t11); - SetArg<12, T12>::set(kernel_, t12); - SetArg<13, T13>::set(kernel_, t13); - SetArg<14, T14>::set(kernel_, t14); - SetArg<15, T15>::set(kernel_, t15); - SetArg<16, T16>::set(kernel_, t16); - SetArg<17, T17>::set(kernel_, t17); - SetArg<18, T18>::set(kernel_, t18); - SetArg<19, T19>::set(kernel_, t19); - SetArg<20, T20>::set(kernel_, t20); - SetArg<21, T21>::set(kernel_, t21); - SetArg<22, T22>::set(kernel_, t22); - SetArg<23, T23>::set(kernel_, t23); - SetArg<24, T24>::set(kernel_, t24); - SetArg<25, T25>::set(kernel_, t25); - SetArg<26, T26>::set(kernel_, t26); - SetArg<27, T27>::set(kernel_, t27); - SetArg<28, T28>::set(kernel_, t28); - SetArg<29, T29>::set(kernel_, t29); - SetArg<30, T30>::set(kernel_, t30); - SetArg<31, T31>::set(kernel_, t31); - - - args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; - } - -}; - -//------------------------------------------------------------------------------------------------------ - - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30, - typename T31> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30, - T31 arg31) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30, - arg31); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3); - } - - -}; - -template< - typename T0, - typename T1, - typename T2> -struct functionImplementation_ -< T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2); - } - - -}; - -template< - typename T0, - typename T1> -struct functionImplementation_ -< T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1) - { - return functor_( - enqueueArgs, - arg0, - arg1); - } - - -}; - -template< - typename T0> -struct functionImplementation_ -< T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0) - { - return functor_( - enqueueArgs, - arg0); - } - - -}; - - - - - -} // namespace detail - -//---------------------------------------------------------------------------------------------- - -template < - typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, - typename T3 = detail::NullType, typename T4 = detail::NullType, - typename T5 = detail::NullType, typename T6 = detail::NullType, - typename T7 = detail::NullType, typename T8 = detail::NullType, - typename T9 = detail::NullType, typename T10 = detail::NullType, - typename T11 = detail::NullType, typename T12 = detail::NullType, - typename T13 = detail::NullType, typename T14 = detail::NullType, - typename T15 = detail::NullType, typename T16 = detail::NullType, - typename T17 = detail::NullType, typename T18 = detail::NullType, - typename T19 = detail::NullType, typename T20 = detail::NullType, - typename T21 = detail::NullType, typename T22 = detail::NullType, - typename T23 = detail::NullType, typename T24 = detail::NullType, - typename T25 = detail::NullType, typename T26 = detail::NullType, - typename T27 = detail::NullType, typename T28 = detail::NullType, - typename T29 = detail::NullType, typename T30 = detail::NullType, - typename T31 = detail::NullType - -> -struct make_kernel : - public detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - - > -{ -public: - typedef detail::KernelFunctorGlobal< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - - > FunctorType; - - make_kernel( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - - >( - FunctorType(program, name, err)) - {} - - make_kernel( - const Kernel kernel) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - - >( - FunctorType(kernel)) - {} -}; - - -//---------------------------------------------------------------------------------------------------------------------- - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_ARG_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_ERR -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR -#undef __SET_PRINTF_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __CL_EXPLICIT_CONSTRUCTORS - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __CL_FUNCTION_TYPE - -// Extensions -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_VERSION_1_1) -#undef __INIT_CL_EXT_FCN_PTR -#endif // #if defined(CL_VERSION_1_1) -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -#undef __DEFAULT_NOT_INITIALIZED -#undef __DEFAULT_BEING_INITIALIZED -#undef __DEFAULT_INITIALIZED - -#undef CL_HPP_RVALUE_REFERENCES_SUPPORTED -#undef CL_HPP_NOEXCEPT - -} // namespace cl - -#endif // CL_HPP_ diff --git a/include/CL/cl_d3d10.h b/include/CL/cl_d3d10.h index 2b80d90cb..0d9950bed 100644 --- a/include/CL/cl_d3d10.h +++ b/include/CL/cl_d3d10.h @@ -21,6 +21,7 @@ #if _MSC_VER >=1500 #pragma warning( push ) #pragma warning( disable : 4201 ) +#pragma warning( disable : 5105 ) #endif #endif #include @@ -75,7 +76,7 @@ typedef cl_uint cl_d3d10_device_set_khr; /******************************************************************************/ -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( +typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, @@ -84,27 +85,27 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -112,7 +113,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -120,6 +121,31 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; +/*************************************************************** +* cl_intel_sharing_format_query_d3d10 +***************************************************************/ +#define cl_intel_sharing_format_query_d3d10 1 + +/* when cl_khr_d3d10_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedD3D10TextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + DXGI_FORMAT* d3d10_formats, + cl_uint* num_texture_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedD3D10TextureFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + DXGI_FORMAT* d3d10_formats, + cl_uint* num_texture_formats) ; + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_d3d11.h b/include/CL/cl_d3d11.h index 10023dde0..9393e5c84 100644 --- a/include/CL/cl_d3d11.h +++ b/include/CL/cl_d3d11.h @@ -21,6 +21,7 @@ #if _MSC_VER >=1500 #pragma warning( push ) #pragma warning( disable : 4201 ) +#pragma warning( disable : 5105 ) #endif #endif #include @@ -75,7 +76,7 @@ typedef cl_uint cl_d3d11_device_set_khr; /******************************************************************************/ -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( +typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void * d3d_object, @@ -84,27 +85,27 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -112,7 +113,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -120,6 +121,33 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; +/*************************************************************** +* cl_intel_sharing_format_query_d3d11 +***************************************************************/ +#define cl_intel_sharing_format_query_d3d11 1 + +/* when cl_khr_d3d11_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedD3D11TextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + DXGI_FORMAT* d3d11_formats, + cl_uint* num_texture_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedD3D11TextureFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + DXGI_FORMAT* d3d11_formats, + cl_uint* num_texture_formats) ; + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_dx9_media_sharing.h b/include/CL/cl_dx9_media_sharing.h index 048937005..fd03bbdc2 100644 --- a/include/CL/cl_dx9_media_sharing.h +++ b/include/CL/cl_dx9_media_sharing.h @@ -32,7 +32,19 @@ typedef cl_uint cl_dx9_media_adapter_type_khr; typedef cl_uint cl_dx9_media_adapter_set_khr; #if defined(_WIN32) +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( push ) +#pragma warning( disable : 4201 ) +#pragma warning( disable : 5105 ) +#endif +#endif #include +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( pop ) +#endif +#endif typedef struct _cl_dx9_surface_info_khr { IDirect3DSurface9 *resource; @@ -76,7 +88,7 @@ typedef struct _cl_dx9_surface_info_khr /******************************************************************************/ -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( +typedef cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr * media_adapter_type, @@ -86,7 +98,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_f cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, @@ -94,7 +106,7 @@ typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( cl_uint plane, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -102,7 +114,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -110,6 +122,144 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; +/*************************************** +* cl_intel_dx9_media_sharing extension * +****************************************/ + +#define cl_intel_dx9_media_sharing 1 + +typedef cl_uint cl_dx9_device_source_intel; +typedef cl_uint cl_dx9_device_set_intel; + +/* error codes */ +#define CL_INVALID_DX9_DEVICE_INTEL -1010 +#define CL_INVALID_DX9_RESOURCE_INTEL -1011 +#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012 +#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013 + +/* cl_dx9_device_source_intel */ +#define CL_D3D9_DEVICE_INTEL 0x4022 +#define CL_D3D9EX_DEVICE_INTEL 0x4070 +#define CL_DXVA_DEVICE_INTEL 0x4071 + +/* cl_dx9_device_set_intel */ +#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024 +#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025 + +/* cl_context_info */ +#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 +#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 +#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 + +/* cl_mem_info */ +#define CL_MEM_DX9_RESOURCE_INTEL 0x4027 +#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074 + +/* cl_image_info */ +#define CL_IMAGE_DX9_PLANE_INTEL 0x4075 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A +#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B +/******************************************************************************/ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDsFromDX9INTEL( + cl_platform_id platform, + cl_dx9_device_source_intel dx9_device_source, + void* dx9_object, + cl_dx9_device_set_intel dx9_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)( + cl_platform_id platform, + cl_dx9_device_source_intel dx9_device_source, + void* dx9_object, + cl_dx9_device_set_intel dx9_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromDX9MediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +/*************************************************************** +* cl_intel_sharing_format_query_dx9 +***************************************************************/ +#define cl_intel_sharing_format_query_dx9 1 + +/* when cl_khr_dx9_media_sharing or cl_intel_dx9_media_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedDX9MediaSurfaceFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + D3DFORMAT* dx9_formats, + cl_uint* num_surface_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedDX9MediaSurfaceFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + D3DFORMAT* dx9_formats, + cl_uint* num_surface_formats) ; + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_dx9_media_sharing_intel.h b/include/CL/cl_dx9_media_sharing_intel.h index 4525a175e..f6518d7f6 100644 --- a/include/CL/cl_dx9_media_sharing_intel.h +++ b/include/CL/cl_dx9_media_sharing_intel.h @@ -13,158 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -/*****************************************************************************\ - -Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. - -THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE -MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -File Name: cl_dx9_media_sharing_intel.h - -Abstract: - -Notes: - -\*****************************************************************************/ - -#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H -#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H - -#include -#include -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/*************************************** -* cl_intel_dx9_media_sharing extension * -****************************************/ - -#define cl_intel_dx9_media_sharing 1 - -typedef cl_uint cl_dx9_device_source_intel; -typedef cl_uint cl_dx9_device_set_intel; - -/* error codes */ -#define CL_INVALID_DX9_DEVICE_INTEL -1010 -#define CL_INVALID_DX9_RESOURCE_INTEL -1011 -#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012 -#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013 - -/* cl_dx9_device_source_intel */ -#define CL_D3D9_DEVICE_INTEL 0x4022 -#define CL_D3D9EX_DEVICE_INTEL 0x4070 -#define CL_DXVA_DEVICE_INTEL 0x4071 - -/* cl_dx9_device_set_intel */ -#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024 -#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025 - -/* cl_context_info */ -#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 -#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 -#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 - -/* cl_mem_info */ -#define CL_MEM_DX9_RESOURCE_INTEL 0x4027 -#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074 - -/* cl_image_info */ -#define CL_IMAGE_DX9_PLANE_INTEL 0x4075 - -/* cl_command_type */ -#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A -#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B -/******************************************************************************/ - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetDeviceIDsFromDX9INTEL( - cl_platform_id platform, - cl_dx9_device_source_intel dx9_device_source, - void* dx9_object, - cl_dx9_device_set_intel dx9_device_set, - cl_uint num_entries, - cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)( - cl_platform_id platform, - cl_dx9_device_source_intel dx9_device_source, - void* dx9_object, - cl_dx9_device_set_intel dx9_device_set, - cl_uint num_entries, - cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromDX9MediaSurfaceINTEL( - cl_context context, - cl_mem_flags flags, - IDirect3DSurface9* resource, - HANDLE sharedHandle, - UINT plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)( - cl_context context, - cl_mem_flags flags, - IDirect3DSurface9* resource, - HANDLE sharedHandle, - UINT plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueAcquireDX9ObjectsINTEL( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReleaseDX9ObjectsINTEL( - cl_command_queue command_queue, - cl_uint num_objects, - cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */ +#include +#pragma message("The Intel DX9 media sharing extensions have been moved into cl_dx9_media_sharing.h. Please include cl_dx9_media_sharing.h directly.") diff --git a/include/CL/cl_egl.h b/include/CL/cl_egl.h index c8bde80e1..357a37c02 100644 --- a/include/CL/cl_egl.h +++ b/include/CL/cl_egl.h @@ -56,7 +56,7 @@ clCreateFromEGLImageKHR(cl_context context, const cl_egl_image_properties_khr * properties, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( cl_context context, CLeglDisplayKHR egldisplay, CLeglImageKHR eglimage, @@ -73,7 +73,7 @@ clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -90,7 +90,7 @@ clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -107,7 +107,7 @@ clCreateEventFromEGLSyncKHR(cl_context context, CLeglDisplayKHR display, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( +typedef cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h index b57acfc87..b6ea825b5 100644 --- a/include/CL/cl_ext.h +++ b/include/CL/cl_ext.h @@ -26,6 +26,494 @@ extern "C" { #include +/*************************************************************** +* cl_khr_command_buffer +***************************************************************/ +#define cl_khr_command_buffer 1 +#define CL_KHR_COMMAND_BUFFER_EXTENSION_NAME \ + "cl_khr_command_buffer" + +typedef cl_bitfield cl_device_command_buffer_capabilities_khr; +typedef struct _cl_command_buffer_khr* cl_command_buffer_khr; +typedef cl_uint cl_sync_point_khr; +typedef cl_uint cl_command_buffer_info_khr; +typedef cl_uint cl_command_buffer_state_khr; +typedef cl_properties cl_command_buffer_properties_khr; +typedef cl_bitfield cl_command_buffer_flags_khr; +typedef cl_properties cl_ndrange_kernel_command_properties_khr; +typedef struct _cl_mutable_command_khr* cl_mutable_command_khr; + +/* cl_device_info */ +#define CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR 0x12A9 +#define CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR 0x12AA + +/* cl_device_command_buffer_capabilities_khr - bitfield */ +#define CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR (1 << 0) +#define CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR (1 << 1) +#define CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR (1 << 2) +#define CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR (1 << 3) + +/* cl_command_buffer_properties_khr */ +#define CL_COMMAND_BUFFER_FLAGS_KHR 0x1293 + +/* cl_command_buffer_flags_khr */ +#define CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR (1 << 0) + +/* Error codes */ +#define CL_INVALID_COMMAND_BUFFER_KHR -1138 +#define CL_INVALID_SYNC_POINT_WAIT_LIST_KHR -1139 +#define CL_INCOMPATIBLE_COMMAND_QUEUE_KHR -1140 + +/* cl_command_buffer_info_khr */ +#define CL_COMMAND_BUFFER_QUEUES_KHR 0x1294 +#define CL_COMMAND_BUFFER_NUM_QUEUES_KHR 0x1295 +#define CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR 0x1296 +#define CL_COMMAND_BUFFER_STATE_KHR 0x1297 +#define CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR 0x1298 + +/* cl_command_buffer_state_khr */ +#define CL_COMMAND_BUFFER_STATE_RECORDING_KHR 0 +#define CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR 1 +#define CL_COMMAND_BUFFER_STATE_PENDING_KHR 2 +#define CL_COMMAND_BUFFER_STATE_INVALID_KHR 3 + +/* cl_command_type */ +#define CL_COMMAND_COMMAND_BUFFER_KHR 0x12A8 + + +typedef cl_command_buffer_khr (CL_API_CALL * +clCreateCommandBufferKHR_fn)( + cl_uint num_queues, + const cl_command_queue* queues, + const cl_command_buffer_properties_khr* properties, + cl_int* errcode_ret) ; + +typedef cl_int (CL_API_CALL * +clFinalizeCommandBufferKHR_fn)( + cl_command_buffer_khr command_buffer) ; + +typedef cl_int (CL_API_CALL * +clRetainCommandBufferKHR_fn)( + cl_command_buffer_khr command_buffer) ; + +typedef cl_int (CL_API_CALL * +clReleaseCommandBufferKHR_fn)( + cl_command_buffer_khr command_buffer) ; + +typedef cl_int (CL_API_CALL * +clEnqueueCommandBufferKHR_fn)( + cl_uint num_queues, + cl_command_queue* queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +typedef cl_int (CL_API_CALL * +clCommandBarrierWithWaitListKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyBufferKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyBufferRectKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyBufferToImageKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyImageKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyImageToBufferKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* region, + size_t dst_offset, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandFillBufferKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const void* pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandFillImageKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const void* fill_color, + const size_t* origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandNDRangeKernelKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr* properties, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clGetCommandBufferInfoKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_buffer_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_command_buffer_khr CL_API_CALL +clCreateCommandBufferKHR( + cl_uint num_queues, + const cl_command_queue* queues, + const cl_command_buffer_properties_khr* properties, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinalizeCommandBufferKHR( + cl_command_buffer_khr command_buffer) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandBufferKHR( + cl_command_buffer_khr command_buffer) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandBufferKHR( + cl_command_buffer_khr command_buffer) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCommandBufferKHR( + cl_uint num_queues, + cl_command_queue* queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandBarrierWithWaitListKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyBufferKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyBufferRectKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyBufferToImageKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyImageKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyImageToBufferKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* region, + size_t dst_offset, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandFillBufferKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const void* pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandFillImageKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const void* fill_color, + const size_t* origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandNDRangeKernelKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr* properties, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandBufferInfoKHR( + cl_command_buffer_khr command_buffer, + cl_command_buffer_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#endif /* CL_NO_PROTOTYPES */ + +/*************************************************************** +* cl_khr_command_buffer_mutable_dispatch +***************************************************************/ +#define cl_khr_command_buffer_mutable_dispatch 1 +#define CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME \ + "cl_khr_command_buffer_mutable_dispatch" + +typedef cl_uint cl_command_buffer_structure_type_khr; +typedef cl_bitfield cl_mutable_dispatch_fields_khr; +typedef cl_uint cl_mutable_command_info_khr; +typedef struct _cl_mutable_dispatch_arg_khr { + cl_uint arg_index; + size_t arg_size; + const void* arg_value; +} cl_mutable_dispatch_arg_khr; +typedef struct _cl_mutable_dispatch_exec_info_khr { + cl_uint param_name; + size_t param_value_size; + const void* param_value; +} cl_mutable_dispatch_exec_info_khr; +typedef struct _cl_mutable_dispatch_config_khr { + cl_command_buffer_structure_type_khr type; + const void* next; + cl_mutable_command_khr command; + cl_uint num_args; + cl_uint num_svm_args; + cl_uint num_exec_infos; + cl_uint work_dim; + const cl_mutable_dispatch_arg_khr* arg_list; + const cl_mutable_dispatch_arg_khr* arg_svm_list; + const cl_mutable_dispatch_exec_info_khr* exec_info_list; + const size_t* global_work_offset; + const size_t* global_work_size; + const size_t* local_work_size; +} cl_mutable_dispatch_config_khr; +typedef struct _cl_mutable_base_config_khr { + cl_command_buffer_structure_type_khr type; + const void* next; + cl_uint num_mutable_dispatch; + const cl_mutable_dispatch_config_khr* mutable_dispatch_list; +} cl_mutable_base_config_khr; + +/* cl_command_buffer_flags_khr - bitfield */ +#define CL_COMMAND_BUFFER_MUTABLE_KHR (1 << 1) + +/* Error codes */ +#define CL_INVALID_MUTABLE_COMMAND_KHR -1141 + +/* cl_device_info */ +#define CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 + +/* cl_ndrange_kernel_command_properties_khr */ +#define CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 + +/* cl_mutable_dispatch_fields_khr - bitfield */ +#define CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (1 << 0) +#define CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR (1 << 1) +#define CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR (1 << 2) +#define CL_MUTABLE_DISPATCH_ARGUMENTS_KHR (1 << 3) +#define CL_MUTABLE_DISPATCH_EXEC_INFO_KHR (1 << 4) + +/* cl_mutable_command_info_khr */ +#define CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR 0x12A0 +#define CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR 0x12A1 +#define CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR 0x12AD +#define CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR 0x12A2 +#define CL_MUTABLE_DISPATCH_KERNEL_KHR 0x12A3 +#define CL_MUTABLE_DISPATCH_DIMENSIONS_KHR 0x12A4 +#define CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR 0x12A5 +#define CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR 0x12A6 +#define CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR 0x12A7 + +/* cl_command_buffer_structure_type_khr */ +#define CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR 0 +#define CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR 1 + + +typedef cl_int (CL_API_CALL * +clUpdateMutableCommandsKHR_fn)( + cl_command_buffer_khr command_buffer, + const cl_mutable_base_config_khr* mutable_config) ; + +typedef cl_int (CL_API_CALL * +clGetMutableCommandInfoKHR_fn)( + cl_mutable_command_khr command, + cl_mutable_command_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clUpdateMutableCommandsKHR( + cl_command_buffer_khr command_buffer, + const cl_mutable_base_config_khr* mutable_config) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMutableCommandInfoKHR( + cl_mutable_command_khr command, + cl_mutable_command_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#endif /* CL_NO_PROTOTYPES */ + /* cl_khr_fp64 extension - no extension #define since it has no functions */ /* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */ @@ -54,9 +542,9 @@ extern "C" { * before using. */ #define cl_APPLE_SetMemObjectDestructor 1 -cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj, +extern CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorAPPLE( cl_mem memobj, void (* pfn_notify)(cl_mem memobj, void * user_data), - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + void * user_data) CL_API_SUFFIX__VERSION_1_0; /* Context Logging Functions @@ -68,22 +556,22 @@ cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj, * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger */ #define cl_APPLE_ContextLoggingFunctions 1 -extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr, +extern CL_API_ENTRY void CL_API_CALL clLogMessagesToSystemLogAPPLE( const char * errstr, const void * private_info, size_t cb, - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + void * user_data) CL_API_SUFFIX__VERSION_1_0; /* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ -extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr, +extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStdoutAPPLE( const char * errstr, const void * private_info, size_t cb, - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + void * user_data) CL_API_SUFFIX__VERSION_1_0; /* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ -extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr, +extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStderrAPPLE( const char * errstr, const void * private_info, size_t cb, - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + void * user_data) CL_API_SUFFIX__VERSION_1_0; /************************ @@ -102,7 +590,7 @@ clIcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms); -typedef CL_API_ENTRY cl_int +typedef cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms); @@ -129,11 +617,11 @@ clCreateProgramWithILKHR(cl_context context, size_t length, cl_int * errcode_ret); -typedef CL_API_ENTRY cl_program +typedef cl_program (CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context, const void * il, size_t length, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; /* Extension: cl_khr_image2d_from_buffer * @@ -176,10 +664,10 @@ typedef CL_API_ENTRY cl_program #define cl_khr_terminate_context 1 extern CL_API_ENTRY cl_int CL_API_CALL -clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; +clTerminateContextKHR(cl_context context) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; +typedef cl_int +(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_API_SUFFIX__VERSION_1_2; /* @@ -204,13 +692,13 @@ extern CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesKHR(cl_context context, cl_device_id device, const cl_queue_properties_khr* properties, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_command_queue +typedef cl_command_queue (CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context, cl_device_id device, const cl_queue_properties_khr* properties, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; /****************************************** @@ -226,6 +714,11 @@ typedef CL_API_ENTRY cl_command_queue #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 +/* extension to cl_nv_device_attribute_query */ +#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 +#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 +#define CL_DEVICE_PCI_DOMAIN_ID_NV 0x400A /********************************* * cl_amd_device_attribute_query * @@ -268,16 +761,16 @@ typedef CL_API_ENTRY cl_command_queue #define cl_ext_device_fission 1 extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; +clReleaseDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; +typedef cl_int +(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL -clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; +clRetainDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; +typedef cl_int +(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; typedef cl_ulong cl_device_partition_property_ext; extern CL_API_ENTRY cl_int CL_API_CALL @@ -285,14 +778,14 @@ clCreateSubDevicesEXT(cl_device_id in_device, const cl_device_partition_property_ext * properties, cl_uint num_entries, cl_device_id * out_devices, - cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_int +typedef cl_int (CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device, const cl_device_partition_property_ext * properties, cl_uint num_entries, cl_device_id * out_devices, - cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_1; /* cl_device_partition_property_ext */ #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 @@ -346,7 +839,7 @@ clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event); -typedef CL_API_ENTRY cl_int +typedef cl_int (CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem * mem_objects, @@ -490,7 +983,7 @@ clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, @@ -498,7 +991,7 @@ clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; /****************************************** * cl_img_generate_mipmap extension * @@ -523,7 +1016,7 @@ clEnqueueGenerateMipmapIMG(cl_command_queue command_queue, const size_t *mip_region, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event *event) CL_API_SUFFIX__VERSION_1_2; /****************************************** * cl_img_mem_properties extension * @@ -564,9 +1057,9 @@ clGetKernelSubGroupInfoKHR(cl_kernel in_kernel, const void * input_value, size_t param_value_size, void * param_value, - size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED; -typedef CL_API_ENTRY cl_int +typedef cl_int (CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel, cl_device_id in_device, cl_kernel_sub_group_info param_name, @@ -574,7 +1067,7 @@ typedef CL_API_ENTRY cl_int const void * input_value, size_t param_value_size, void * param_value, - size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED; /********************************* @@ -694,6 +1187,361 @@ typedef struct _cl_name_version_khr #define CL_DEVICE_NODE_MASK_KHR 0x106E +/*************************************************************** +* cl_khr_pci_bus_info +***************************************************************/ +#define cl_khr_pci_bus_info 1 + +typedef struct _cl_device_pci_bus_info_khr { + cl_uint pci_domain; + cl_uint pci_bus; + cl_uint pci_device; + cl_uint pci_function; +} cl_device_pci_bus_info_khr; + +/* cl_device_info */ +#define CL_DEVICE_PCI_BUS_INFO_KHR 0x410F + + +/*************************************************************** +* cl_khr_suggested_local_work_size +***************************************************************/ +#define cl_khr_suggested_local_work_size 1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSuggestedLocalWorkSizeKHR( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int (CL_API_CALL * +clGetKernelSuggestedLocalWorkSizeKHR_fn)( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0; + + +/*************************************************************** +* cl_khr_integer_dot_product +***************************************************************/ +#define cl_khr_integer_dot_product 1 + +typedef cl_bitfield cl_device_integer_dot_product_capabilities_khr; + +/* cl_device_integer_dot_product_capabilities_khr */ +#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR (1 << 0) +#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR (1 << 1) + +typedef struct _cl_device_integer_dot_product_acceleration_properties_khr { + cl_bool signed_accelerated; + cl_bool unsigned_accelerated; + cl_bool mixed_signedness_accelerated; + cl_bool accumulating_saturating_signed_accelerated; + cl_bool accumulating_saturating_unsigned_accelerated; + cl_bool accumulating_saturating_mixed_signedness_accelerated; +} cl_device_integer_dot_product_acceleration_properties_khr; + +/* cl_device_info */ +#define CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR 0x1073 +#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR 0x1074 +#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR 0x1075 + + +/*************************************************************** +* cl_khr_external_memory +***************************************************************/ +#define cl_khr_external_memory 1 + +typedef cl_uint cl_external_memory_handle_type_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x2044 + +/* cl_device_info */ +#define CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x204F + +/* cl_mem_properties */ +#define CL_DEVICE_HANDLE_LIST_KHR 0x2051 +#define CL_DEVICE_HANDLE_LIST_END_KHR 0 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR 0x2047 +#define CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR 0x2048 + + +typedef cl_int (CL_API_CALL * +clEnqueueAcquireExternalMemObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int (CL_API_CALL * +clEnqueueReleaseExternalMemObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireExternalMemObjectsKHR( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseExternalMemObjectsKHR( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +/*************************************************************** +* cl_khr_external_memory_dma_buf +***************************************************************/ +#define cl_khr_external_memory_dma_buf 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR 0x2067 + +/*************************************************************** +* cl_khr_external_memory_dx +***************************************************************/ +#define cl_khr_external_memory_dx 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR 0x2063 +#define CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR 0x2064 +#define CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR 0x2065 +#define CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR 0x2066 + +/*************************************************************** +* cl_khr_external_memory_opaque_fd +***************************************************************/ +#define cl_khr_external_memory_opaque_fd 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR 0x2060 + +/*************************************************************** +* cl_khr_external_memory_win32 +***************************************************************/ +#define cl_khr_external_memory_win32 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR 0x2061 +#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2062 + +/*************************************************************** +* cl_khr_external_semaphore +***************************************************************/ +#define cl_khr_external_semaphore 1 + +typedef struct _cl_semaphore_khr * cl_semaphore_khr; +typedef cl_uint cl_external_semaphore_handle_type_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x2037 +#define CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x2038 + +/* cl_device_info */ +#define CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x204D +#define CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x204E + +/* cl_semaphore_properties_khr */ +#define CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x203F +#define CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR 0 + + +typedef cl_int (CL_API_CALL * +clGetSemaphoreHandleForTypeKHR_fn)( + cl_semaphore_khr sema_object, + cl_device_id device, + cl_external_semaphore_handle_type_khr handle_type, + size_t handle_size, + void* handle_ptr, + size_t* handle_size_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSemaphoreHandleForTypeKHR( + cl_semaphore_khr sema_object, + cl_device_id device, + cl_external_semaphore_handle_type_khr handle_type, + size_t handle_size, + void* handle_ptr, + size_t* handle_size_ret) CL_API_SUFFIX__VERSION_1_2; + +/*************************************************************** +* cl_khr_external_semaphore_dx_fence +***************************************************************/ +#define cl_khr_external_semaphore_dx_fence 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR 0x2059 + +/*************************************************************** +* cl_khr_external_semaphore_opaque_fd +***************************************************************/ +#define cl_khr_external_semaphore_opaque_fd 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR 0x2055 + +/*************************************************************** +* cl_khr_external_semaphore_sync_fd +***************************************************************/ +#define cl_khr_external_semaphore_sync_fd 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_SYNC_FD_KHR 0x2058 + +/*************************************************************** +* cl_khr_external_semaphore_win32 +***************************************************************/ +#define cl_khr_external_semaphore_win32 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR 0x2056 +#define CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2057 + +/*************************************************************** +* cl_khr_semaphore +***************************************************************/ +#define cl_khr_semaphore 1 + +/* type cl_semaphore_khr */ +typedef cl_properties cl_semaphore_properties_khr; +typedef cl_uint cl_semaphore_info_khr; +typedef cl_uint cl_semaphore_type_khr; +typedef cl_ulong cl_semaphore_payload_khr; + +/* cl_semaphore_type */ +#define CL_SEMAPHORE_TYPE_BINARY_KHR 1 + +/* cl_platform_info */ +#define CL_PLATFORM_SEMAPHORE_TYPES_KHR 0x2036 + +/* cl_device_info */ +#define CL_DEVICE_SEMAPHORE_TYPES_KHR 0x204C + +/* cl_semaphore_info_khr */ +#define CL_SEMAPHORE_CONTEXT_KHR 0x2039 +#define CL_SEMAPHORE_REFERENCE_COUNT_KHR 0x203A +#define CL_SEMAPHORE_PROPERTIES_KHR 0x203B +#define CL_SEMAPHORE_PAYLOAD_KHR 0x203C + +/* cl_semaphore_info_khr or cl_semaphore_properties_khr */ +#define CL_SEMAPHORE_TYPE_KHR 0x203D +/* enum CL_DEVICE_HANDLE_LIST_KHR */ +/* enum CL_DEVICE_HANDLE_LIST_END_KHR */ + +/* cl_command_type */ +#define CL_COMMAND_SEMAPHORE_WAIT_KHR 0x2042 +#define CL_COMMAND_SEMAPHORE_SIGNAL_KHR 0x2043 + +/* Error codes */ +#define CL_INVALID_SEMAPHORE_KHR -1142 + + +typedef cl_semaphore_khr (CL_API_CALL * +clCreateSemaphoreWithPropertiesKHR_fn)( + cl_context context, + const cl_semaphore_properties_khr* sema_props, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clEnqueueWaitSemaphoresKHR_fn)( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clEnqueueSignalSemaphoresKHR_fn)( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clGetSemaphoreInfoKHR_fn)( + cl_semaphore_khr sema_object, + cl_semaphore_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clReleaseSemaphoreKHR_fn)( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clRetainSemaphoreKHR_fn)( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_semaphore_khr CL_API_CALL +clCreateSemaphoreWithPropertiesKHR( + cl_context context, + const cl_semaphore_properties_khr* sema_props, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWaitSemaphoresKHR( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSignalSemaphoresKHR( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSemaphoreInfoKHR( + cl_semaphore_khr sema_object, + cl_semaphore_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSemaphoreKHR( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSemaphoreKHR( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + /********************************** * cl_arm_import_memory extension * **********************************/ @@ -719,6 +1567,12 @@ typedef intptr_t cl_import_properties_arm; /* Data consistency with host property */ #define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3 +/* Index of plane in a multiplanar hardware buffer */ +#define CL_IMPORT_ANDROID_HARDWARE_BUFFER_PLANE_INDEX_ARM 0x41EF + +/* Index of layer in a multilayer hardware buffer */ +#define CL_IMPORT_ANDROID_HARDWARE_BUFFER_LAYER_INDEX_ARM 0x41F0 + /* Import memory size value to indicate a size for the whole buffer */ #define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX @@ -744,7 +1598,7 @@ clImportMemoryARM( cl_context context, const cl_import_properties_arm *properties, void *memory, size_t size, - cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; /****************************************** @@ -787,11 +1641,11 @@ extern CL_API_ENTRY void * CL_API_CALL clSVMAllocARM(cl_context context, cl_svm_mem_flags_arm flags, size_t size, - cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2; + cl_uint alignment) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY void CL_API_CALL clSVMFreeARM(cl_context context, - void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2; + void * svm_pointer) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFreeARM(cl_command_queue command_queue, @@ -804,7 +1658,7 @@ clEnqueueSVMFreeARM(cl_command_queue command_queue, void * user_data, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpyARM(cl_command_queue command_queue, @@ -814,7 +1668,7 @@ clEnqueueSVMMemcpyARM(cl_command_queue command_queue, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFillARM(cl_command_queue command_queue, @@ -824,7 +1678,7 @@ clEnqueueSVMMemFillARM(cl_command_queue command_queue, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMapARM(cl_command_queue command_queue, @@ -834,25 +1688,25 @@ clEnqueueSVMMapARM(cl_command_queue command_queue, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMUnmapARM(cl_command_queue command_queue, void * svm_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerARM(cl_kernel kernel, cl_uint arg_index, - const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2; + const void * arg_value) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelExecInfoARM(cl_kernel kernel, cl_kernel_exec_info_arm param_name, size_t param_value_size, - const void * param_value) CL_EXT_SUFFIX__VERSION_1_2; + const void * param_value) CL_API_SUFFIX__VERSION_1_2; /******************************** * cl_arm_get_core_id extension * @@ -885,6 +1739,8 @@ clSetKernelExecInfoARM(cl_kernel kernel, #define cl_arm_scheduling_controls 1 +typedef cl_bitfield cl_device_scheduling_controls_capabilities_arm; + /* cl_device_info */ #define CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM 0x41E4 @@ -892,15 +1748,889 @@ clSetKernelExecInfoARM(cl_kernel kernel, #define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_ARM (1 << 1) #define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_MODIFIER_ARM (1 << 2) #define CL_DEVICE_SCHEDULING_DEFERRED_FLUSH_ARM (1 << 3) +#define CL_DEVICE_SCHEDULING_REGISTER_ALLOCATION_ARM (1 << 4) +#define CL_DEVICE_SCHEDULING_WARP_THROTTLING_ARM (1 << 5) +#define CL_DEVICE_SCHEDULING_COMPUTE_UNIT_BATCH_QUEUE_SIZE_ARM (1 << 6) + +#define CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM 0x41EB +#define CL_DEVICE_MAX_WARP_COUNT_ARM 0x41EA /* cl_kernel_info */ +#define CL_KERNEL_MAX_WARP_COUNT_ARM 0x41E9 + +/* cl_kernel_exec_info */ #define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM 0x41E5 #define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM 0x41E6 +#define CL_KERNEL_EXEC_INFO_WARP_COUNT_LIMIT_ARM 0x41E8 +#define CL_KERNEL_EXEC_INFO_COMPUTE_UNIT_MAX_QUEUED_BATCHES_ARM 0x41F1 /* cl_queue_properties */ #define CL_QUEUE_KERNEL_BATCHING_ARM 0x41E7 #define CL_QUEUE_DEFERRED_FLUSH_ARM 0x41EC +/************************************** +* cl_arm_controlled_kernel_termination +***************************************/ + +#define cl_arm_controlled_kernel_termination 1 + +/* Error code to indicate kernel terminated with failure */ +#define CL_COMMAND_TERMINATED_ITSELF_WITH_FAILURE_ARM -1108 + +/* cl_device_info */ +#define CL_DEVICE_CONTROLLED_TERMINATION_CAPABILITIES_ARM 0x41EE + +/* Bit fields for controlled termination feature query */ +typedef cl_bitfield cl_device_controlled_termination_capabilities_arm; + +#define CL_DEVICE_CONTROLLED_TERMINATION_SUCCESS_ARM (1 << 0) +#define CL_DEVICE_CONTROLLED_TERMINATION_FAILURE_ARM (1 << 1) +#define CL_DEVICE_CONTROLLED_TERMINATION_QUERY_ARM (1 << 2) + +/* cl_event_info */ +#define CL_EVENT_COMMAND_TERMINATION_REASON_ARM 0x41ED + +/* Values returned for event termination reason query */ +typedef cl_uint cl_command_termination_reason_arm; + +#define CL_COMMAND_TERMINATION_COMPLETION_ARM 0 +#define CL_COMMAND_TERMINATION_CONTROLLED_SUCCESS_ARM 1 +#define CL_COMMAND_TERMINATION_CONTROLLED_FAILURE_ARM 2 +#define CL_COMMAND_TERMINATION_ERROR_ARM 3 + +/************************************* +* cl_arm_protected_memory_allocation * +*************************************/ + +#define cl_arm_protected_memory_allocation 1 + +#define CL_MEM_PROTECTED_ALLOC_ARM (1ULL << 36) + +/****************************************** +* cl_intel_exec_by_local_thread extension * +******************************************/ + +#define cl_intel_exec_by_local_thread 1 + +#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31) + +/*************************************************************** +* cl_intel_device_attribute_query +***************************************************************/ + +#define cl_intel_device_attribute_query 1 + +typedef cl_bitfield cl_device_feature_capabilities_intel; + +/* cl_device_feature_capabilities_intel */ +#define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0) +#define CL_DEVICE_FEATURE_FLAG_DPAS_INTEL (1 << 1) + +/* cl_device_info */ +#define CL_DEVICE_IP_VERSION_INTEL 0x4250 +#define CL_DEVICE_ID_INTEL 0x4251 +#define CL_DEVICE_NUM_SLICES_INTEL 0x4252 +#define CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL 0x4253 +#define CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL 0x4254 +#define CL_DEVICE_NUM_THREADS_PER_EU_INTEL 0x4255 +#define CL_DEVICE_FEATURE_CAPABILITIES_INTEL 0x4256 + +/*********************************************** +* cl_intel_device_partition_by_names extension * +************************************************/ + +#define cl_intel_device_partition_by_names 1 + +#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 +#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1 + +/************************************************ +* cl_intel_accelerator extension * +* cl_intel_motion_estimation extension * +* cl_intel_advanced_motion_estimation extension * +*************************************************/ + +#define cl_intel_accelerator 1 +#define cl_intel_motion_estimation 1 +#define cl_intel_advanced_motion_estimation 1 + +typedef struct _cl_accelerator_intel* cl_accelerator_intel; +typedef cl_uint cl_accelerator_type_intel; +typedef cl_uint cl_accelerator_info_intel; + +typedef struct _cl_motion_estimation_desc_intel { + cl_uint mb_block_type; + cl_uint subpixel_mode; + cl_uint sad_adjust_mode; + cl_uint search_path_type; +} cl_motion_estimation_desc_intel; + +/* error codes */ +#define CL_INVALID_ACCELERATOR_INTEL -1094 +#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 +#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 +#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 + +/* cl_accelerator_type_intel */ +#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 + +/* cl_accelerator_info_intel */ +#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 +#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 +#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 +#define CL_ACCELERATOR_TYPE_INTEL 0x4093 + +/* cl_motion_detect_desc_intel flags */ +#define CL_ME_MB_TYPE_16x16_INTEL 0x0 +#define CL_ME_MB_TYPE_8x8_INTEL 0x1 +#define CL_ME_MB_TYPE_4x4_INTEL 0x2 + +#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 + +#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 + +#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 +#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 +#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 + +#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0 +#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 +#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 +#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4 + +#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1 +#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2 +#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3 + +#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16 +#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21 +#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32 +#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43 +#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48 + +#define CL_ME_COST_PENALTY_NONE_INTEL 0x0 +#define CL_ME_COST_PENALTY_LOW_INTEL 0x1 +#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 +#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3 + +#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CL_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 +#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 + +#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 +#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 + +#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 +#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 +#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 + +/* cl_device_info */ +#define CL_DEVICE_ME_VERSION_INTEL 0x407E + +#define CL_ME_VERSION_LEGACY_INTEL 0x0 +#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 +#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2 + +extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL +clCreateAcceleratorINTEL( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetAcceleratorInfoINTEL( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainAcceleratorINTEL( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseAcceleratorINTEL( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +/****************************************** +* cl_intel_simultaneous_sharing extension * +*******************************************/ + +#define cl_intel_simultaneous_sharing 1 + +#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 +#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 + +/*********************************** +* cl_intel_egl_image_yuv extension * +************************************/ + +#define cl_intel_egl_image_yuv 1 + +#define CL_EGL_YUV_PLANE_INTEL 0x4107 + +/******************************** +* cl_intel_packed_yuv extension * +*********************************/ + +#define cl_intel_packed_yuv 1 + +#define CL_YUYV_INTEL 0x4076 +#define CL_UYVY_INTEL 0x4077 +#define CL_YVYU_INTEL 0x4078 +#define CL_VYUY_INTEL 0x4079 + +/******************************************** +* cl_intel_required_subgroup_size extension * +*********************************************/ + +#define cl_intel_required_subgroup_size 1 + +#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 +#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 +#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A + +/**************************************** +* cl_intel_driver_diagnostics extension * +*****************************************/ + +#define cl_intel_driver_diagnostics 1 + +typedef cl_uint cl_diagnostics_verbose_level; + +#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106 + +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 ) + +/******************************** +* cl_intel_planar_yuv extension * +*********************************/ + +#define CL_NV12_INTEL 0x410E + +#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 ) +#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 ) + +#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E +#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F + +/******************************************************* +* cl_intel_device_side_avc_motion_estimation extension * +********************************************************/ + +#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B +#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C +#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D + +#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */ +#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */ + +#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0 +#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1 +#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2 +#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3 + +#define CL_AVC_ME_MINOR_8x8_INTEL 0x0 +#define CL_AVC_ME_MINOR_8x4_INTEL 0x1 +#define CL_AVC_ME_MINOR_4x8_INTEL 0x2 +#define CL_AVC_ME_MINOR_4x4_INTEL 0x3 + +#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0 +#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 +#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 + +#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 +#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E +#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D +#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B +#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 +#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F +#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F +#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F + +#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 +#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 +#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 +#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 +#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 +#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 +#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 +#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 +#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 +#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9 +#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2 +#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa + +#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 + +#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 + +#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 +#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 +#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 +#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B +#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 + +#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 +#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 +#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 +#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 + +#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 +#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 + +#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 ) + +#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 +#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 + +#define CL_AVC_ME_INTRA_16x16_INTEL 0x0 +#define CL_AVC_ME_INTRA_8x8_INTEL 0x1 +#define CL_AVC_ME_INTRA_4x4_INTEL 0x2 + +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 + +#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 + +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 + +#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1 +#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2 +#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3 + +#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 +#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 +#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 + +#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 +#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 + +/******************************************* +* cl_intel_unified_shared_memory extension * +********************************************/ +#define cl_intel_unified_shared_memory 1 + +typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel; +typedef cl_properties cl_mem_properties_intel; +typedef cl_bitfield cl_mem_alloc_flags_intel; +typedef cl_uint cl_mem_info_intel; +typedef cl_uint cl_unified_shared_memory_type_intel; +typedef cl_uint cl_mem_advice_intel; + +/* cl_device_info */ +#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 +#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 +#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 +#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 +#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 + +/* cl_device_unified_shared_memory_capabilities_intel - bitfield */ +#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) +#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) +#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) +#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) + +/* cl_mem_properties_intel */ +#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 + +/* cl_mem_alloc_flags_intel - bitfield */ +#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) +#define CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL (1 << 1) +#define CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL (1 << 2) + +/* cl_mem_alloc_info_intel */ +#define CL_MEM_ALLOC_TYPE_INTEL 0x419A +#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B +#define CL_MEM_ALLOC_SIZE_INTEL 0x419C +#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D + +/* cl_unified_shared_memory_type_intel */ +#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 +#define CL_MEM_TYPE_HOST_INTEL 0x4197 +#define CL_MEM_TYPE_DEVICE_INTEL 0x4198 +#define CL_MEM_TYPE_SHARED_INTEL 0x4199 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 +#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 +#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 +#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 + +/* cl_command_type */ +#define CL_COMMAND_MEMFILL_INTEL 0x4204 +#define CL_COMMAND_MEMCPY_INTEL 0x4205 +#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 +#define CL_COMMAND_MEMADVISE_INTEL 0x4207 + + +typedef void* (CL_API_CALL * +clHostMemAllocINTEL_fn)( + cl_context context, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +typedef void* (CL_API_CALL * +clDeviceMemAllocINTEL_fn)( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +typedef void* (CL_API_CALL * +clSharedMemAllocINTEL_fn)( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +typedef cl_int (CL_API_CALL * +clMemFreeINTEL_fn)( + cl_context context, + void* ptr) ; + +typedef cl_int (CL_API_CALL * +clMemBlockingFreeINTEL_fn)( + cl_context context, + void* ptr) ; + +typedef cl_int (CL_API_CALL * +clGetMemAllocInfoINTEL_fn)( + cl_context context, + const void* ptr, + cl_mem_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +typedef cl_int (CL_API_CALL * +clSetKernelArgMemPointerINTEL_fn)( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value) ; + +typedef cl_int (CL_API_CALL * +clEnqueueMemFillINTEL_fn)( + cl_command_queue command_queue, + void* dst_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +typedef cl_int (CL_API_CALL * +clEnqueueMemcpyINTEL_fn)( + cl_command_queue command_queue, + cl_bool blocking, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +typedef cl_int (CL_API_CALL * +clEnqueueMemAdviseINTEL_fn)( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_advice_intel advice, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY void* CL_API_CALL +clHostMemAllocINTEL( + cl_context context, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY void* CL_API_CALL +clDeviceMemAllocINTEL( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY void* CL_API_CALL +clSharedMemAllocINTEL( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clMemFreeINTEL( + cl_context context, + void* ptr) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clMemBlockingFreeINTEL( + cl_context context, + void* ptr) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemAllocInfoINTEL( + cl_context context, + const void* ptr, + cl_mem_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgMemPointerINTEL( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemFillINTEL( + cl_command_queue command_queue, + void* dst_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemcpyINTEL( + cl_command_queue command_queue, + cl_bool blocking, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemAdviseINTEL( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_advice_intel advice, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#endif /* CL_NO_PROTOTYPES */ + +#if defined(CL_VERSION_1_2) +/* Requires OpenCL 1.2 for cl_mem_migration_flags: */ + +typedef cl_int (CL_API_CALL * +clEnqueueMigrateMemINTEL_fn)( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemINTEL( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#endif /* CL_NO_PROTOTYPES */ + +#endif /* defined(CL_VERSION_1_2) */ + +/* deprecated, use clEnqueueMemFillINTEL instead */ + +typedef cl_int (CL_API_CALL * +clEnqueueMemsetINTEL_fn)( + cl_command_queue command_queue, + void* dst_ptr, + cl_int value, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemsetINTEL( + cl_command_queue command_queue, + void* dst_ptr, + cl_int value, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#endif /* CL_NO_PROTOTYPES */ + +/*************************************************************** +* cl_intel_mem_alloc_buffer_location +***************************************************************/ +#define cl_intel_mem_alloc_buffer_location 1 +#define CL_INTEL_MEM_ALLOC_BUFFER_LOCATION_EXTENSION_NAME \ + "cl_intel_mem_alloc_buffer_location" + +/* cl_mem_properties_intel */ +#define CL_MEM_ALLOC_BUFFER_LOCATION_INTEL 0x419E + +/* cl_mem_alloc_info_intel */ +/* enum CL_MEM_ALLOC_BUFFER_LOCATION_INTEL */ + +/*************************************************** +* cl_intel_create_buffer_with_properties extension * +****************************************************/ + +#define cl_intel_create_buffer_with_properties 1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferWithPropertiesINTEL( + cl_context context, + const cl_mem_properties_intel* properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem (CL_API_CALL * +clCreateBufferWithPropertiesINTEL_fn)( + cl_context context, + const cl_mem_properties_intel* properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +/****************************************** +* cl_intel_mem_channel_property extension * +*******************************************/ + +#define CL_MEM_CHANNEL_INTEL 0x4213 + +/********************************* +* cl_intel_mem_force_host_memory * +**********************************/ + +#define cl_intel_mem_force_host_memory 1 + +/* cl_mem_flags */ +#define CL_MEM_FORCE_HOST_MEMORY_INTEL (1 << 20) + +/*************************************************************** +* cl_intel_command_queue_families +***************************************************************/ +#define cl_intel_command_queue_families 1 + +typedef cl_bitfield cl_command_queue_capabilities_intel; + +#define CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL 64 + +typedef struct _cl_queue_family_properties_intel { + cl_command_queue_properties properties; + cl_command_queue_capabilities_intel capabilities; + cl_uint count; + char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL]; +} cl_queue_family_properties_intel; + +/* cl_device_info */ +#define CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL 0x418B + +/* cl_queue_properties */ +#define CL_QUEUE_FAMILY_INTEL 0x418C +#define CL_QUEUE_INDEX_INTEL 0x418D + +/* cl_command_queue_capabilities_intel */ +#define CL_QUEUE_DEFAULT_CAPABILITIES_INTEL 0 +#define CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL (1 << 0) +#define CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL (1 << 1) +#define CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 2) +#define CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 3) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL (1 << 8) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL (1 << 9) +#define CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL (1 << 10) +#define CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL (1 << 11) +#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL (1 << 12) +#define CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL (1 << 13) +#define CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL (1 << 14) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL (1 << 15) +#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL (1 << 16) +#define CL_QUEUE_CAPABILITY_MARKER_INTEL (1 << 24) +#define CL_QUEUE_CAPABILITY_BARRIER_INTEL (1 << 25) +#define CL_QUEUE_CAPABILITY_KERNEL_INTEL (1 << 26) + +/*************************************************************** +* cl_intel_queue_no_sync_operations +***************************************************************/ + +#define cl_intel_queue_no_sync_operations 1 + +/* addition to cl_command_queue_properties */ +#define CL_QUEUE_NO_SYNC_OPERATIONS_INTEL (1 << 29) + +/*************************************************************** +* cl_intel_sharing_format_query +***************************************************************/ +#define cl_intel_sharing_format_query 1 + +/*************************************************************** +* cl_ext_image_requirements_info +***************************************************************/ + +#ifdef CL_VERSION_3_0 + +#define cl_ext_image_requirements_info 1 + +typedef cl_uint cl_image_requirements_info_ext; + +#define CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT 0x1290 +#define CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT 0x1292 +#define CL_IMAGE_REQUIREMENTS_SIZE_EXT 0x12B2 +#define CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT 0x12B3 +#define CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT 0x12B4 +#define CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT 0x12B5 +#define CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT 0x12B6 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageRequirementsInfoEXT( + cl_context context, + const cl_mem_properties* properties, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + cl_image_requirements_info_ext param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int (CL_API_CALL * +clGetImageRequirementsInfoEXT_fn)( + cl_context context, + const cl_mem_properties* properties, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + cl_image_requirements_info_ext param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_3_0; + +#endif + +/*************************************************************** +* cl_ext_image_from_buffer +***************************************************************/ + +#ifdef CL_VERSION_3_0 + +#define cl_ext_image_from_buffer 1 + +#define CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT 0x1291 + +#endif + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_ext_intel.h b/include/CL/cl_ext_intel.h index aab82284c..a7ae87a34 100644 --- a/include/CL/cl_ext_intel.h +++ b/include/CL/cl_ext_intel.h @@ -14,718 +14,6 @@ * limitations under the License. * ******************************************************************************/ -/*****************************************************************************\ -Copyright (c) 2013-2020 Intel Corporation All Rights Reserved. - -THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE -MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -File Name: cl_ext_intel.h - -Abstract: - -Notes: - -\*****************************************************************************/ - -#ifndef __CL_EXT_INTEL_H -#define __CL_EXT_INTEL_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/*************************************** -* cl_intel_thread_local_exec extension * -****************************************/ - -#define cl_intel_thread_local_exec 1 - -#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31) - -/*********************************************** -* cl_intel_device_partition_by_names extension * -************************************************/ - -#define cl_intel_device_partition_by_names 1 - -#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 -#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1 - -/************************************************ -* cl_intel_accelerator extension * -* cl_intel_motion_estimation extension * -* cl_intel_advanced_motion_estimation extension * -*************************************************/ - -#define cl_intel_accelerator 1 -#define cl_intel_motion_estimation 1 -#define cl_intel_advanced_motion_estimation 1 - -typedef struct _cl_accelerator_intel* cl_accelerator_intel; -typedef cl_uint cl_accelerator_type_intel; -typedef cl_uint cl_accelerator_info_intel; - -typedef struct _cl_motion_estimation_desc_intel { - cl_uint mb_block_type; - cl_uint subpixel_mode; - cl_uint sad_adjust_mode; - cl_uint search_path_type; -} cl_motion_estimation_desc_intel; - -/* error codes */ -#define CL_INVALID_ACCELERATOR_INTEL -1094 -#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 -#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 -#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 - -/* cl_accelerator_type_intel */ -#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 - -/* cl_accelerator_info_intel */ -#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 -#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 -#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 -#define CL_ACCELERATOR_TYPE_INTEL 0x4093 - -/* cl_motion_detect_desc_intel flags */ -#define CL_ME_MB_TYPE_16x16_INTEL 0x0 -#define CL_ME_MB_TYPE_8x8_INTEL 0x1 -#define CL_ME_MB_TYPE_4x4_INTEL 0x2 - -#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 -#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 -#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 - -#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 -#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 - -#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 -#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 -#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 - -#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0 -#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 -#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 -#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4 - -#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1 -#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2 -#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3 - -#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16 -#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21 -#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32 -#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43 -#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48 - -#define CL_ME_COST_PENALTY_NONE_INTEL 0x0 -#define CL_ME_COST_PENALTY_LOW_INTEL 0x1 -#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 -#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3 - -#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0 -#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1 -#define CL_ME_COST_PRECISION_PEL_INTEL 0x2 -#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3 - -#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 -#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 -#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 - -#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 -#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 -#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 -#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 -#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 -#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 - -#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 -#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 -#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 - -/* cl_device_info */ -#define CL_DEVICE_ME_VERSION_INTEL 0x407E - -#define CL_ME_VERSION_LEGACY_INTEL 0x0 -#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 -#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2 - -extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL -clCreateAcceleratorINTEL( - cl_context context, - cl_accelerator_type_intel accelerator_type, - size_t descriptor_size, - const void* descriptor, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)( - cl_context context, - cl_accelerator_type_intel accelerator_type, - size_t descriptor_size, - const void* descriptor, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetAcceleratorInfoINTEL( - cl_accelerator_intel accelerator, - cl_accelerator_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)( - cl_accelerator_intel accelerator, - cl_accelerator_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainAcceleratorINTEL( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseAcceleratorINTEL( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -/****************************************** -* cl_intel_simultaneous_sharing extension * -*******************************************/ - -#define cl_intel_simultaneous_sharing 1 - -#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 -#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 - -/*********************************** -* cl_intel_egl_image_yuv extension * -************************************/ - -#define cl_intel_egl_image_yuv 1 - -#define CL_EGL_YUV_PLANE_INTEL 0x4107 - -/******************************** -* cl_intel_packed_yuv extension * -*********************************/ - -#define cl_intel_packed_yuv 1 - -#define CL_YUYV_INTEL 0x4076 -#define CL_UYVY_INTEL 0x4077 -#define CL_YVYU_INTEL 0x4078 -#define CL_VYUY_INTEL 0x4079 - -/******************************************** -* cl_intel_required_subgroup_size extension * -*********************************************/ - -#define cl_intel_required_subgroup_size 1 - -#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 -#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 -#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A - -/**************************************** -* cl_intel_driver_diagnostics extension * -*****************************************/ - -#define cl_intel_driver_diagnostics 1 - -typedef cl_uint cl_diagnostics_verbose_level; - -#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106 - -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff ) -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 ) -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 ) -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 ) - -/******************************** -* cl_intel_planar_yuv extension * -*********************************/ - -#define CL_NV12_INTEL 0x410E - -#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 ) -#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 ) - -#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E -#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F - -/******************************************************* -* cl_intel_device_side_avc_motion_estimation extension * -********************************************************/ - -#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B -#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C -#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D - -#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */ -#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */ - -#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0 -#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1 -#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2 -#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3 - -#define CL_AVC_ME_MINOR_8x8_INTEL 0x0 -#define CL_AVC_ME_MINOR_8x4_INTEL 0x1 -#define CL_AVC_ME_MINOR_4x8_INTEL 0x2 -#define CL_AVC_ME_MINOR_4x4_INTEL 0x3 - -#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0 -#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 -#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 - -#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 -#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E -#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D -#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B -#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 -#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F -#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F -#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F - -#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 -#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 -#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 -#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 -#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 -#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 -#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 -#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 -#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 -#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9 -#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2 -#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa - -#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 -#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 - -#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 -#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 -#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 - -#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 -#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 -#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 -#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 - -#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 -#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 -#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 -#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B -#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 - -#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 -#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 -#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 -#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 - -#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 -#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 - -#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 ) - -#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 -#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 - -#define CL_AVC_ME_INTRA_16x16_INTEL 0x0 -#define CL_AVC_ME_INTRA_8x8_INTEL 0x1 -#define CL_AVC_ME_INTRA_4x4_INTEL 0x2 - -#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 -#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 -#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 - -#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 -#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 -#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 -#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 - -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 - -#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1 -#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2 -#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3 - -#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 -#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 -#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 - -#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 -#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 - -/******************************************* -* cl_intel_unified_shared_memory extension * -********************************************/ - -/* These APIs are in sync with Revision Q of the cl_intel_unified_shared_memory spec! */ - -#define cl_intel_unified_shared_memory 1 - -/* cl_device_info */ -#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 -#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 -#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 -#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 -#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 - -typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel; - -/* cl_device_unified_shared_memory_capabilities_intel - bitfield */ -#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) -#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) -#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) -#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) - -typedef cl_properties cl_mem_properties_intel; - -/* cl_mem_properties_intel */ -#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 - -typedef cl_bitfield cl_mem_alloc_flags_intel; - -/* cl_mem_alloc_flags_intel - bitfield */ -#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) - -typedef cl_uint cl_mem_info_intel; - -/* cl_mem_alloc_info_intel */ -#define CL_MEM_ALLOC_TYPE_INTEL 0x419A -#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B -#define CL_MEM_ALLOC_SIZE_INTEL 0x419C -#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D -/* Enum values 0x419E-0x419F are reserved for future queries. */ - -typedef cl_uint cl_unified_shared_memory_type_intel; - -/* cl_unified_shared_memory_type_intel */ -#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 -#define CL_MEM_TYPE_HOST_INTEL 0x4197 -#define CL_MEM_TYPE_DEVICE_INTEL 0x4198 -#define CL_MEM_TYPE_SHARED_INTEL 0x4199 - -typedef cl_uint cl_mem_advice_intel; - -/* cl_mem_advice_intel */ -/* Enum values 0x4208-0x420F are reserved for future memory advices. */ - -/* cl_kernel_exec_info */ -#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 -#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 -#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 -#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 - -/* cl_command_type */ -#define CL_COMMAND_MEMFILL_INTEL 0x4204 -#define CL_COMMAND_MEMCPY_INTEL 0x4205 -#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 -#define CL_COMMAND_MEMADVISE_INTEL 0x4207 - -extern CL_API_ENTRY void* CL_API_CALL -clHostMemAllocINTEL( - cl_context context, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -typedef CL_API_ENTRY void* (CL_API_CALL * -clHostMemAllocINTEL_fn)( - cl_context context, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -extern CL_API_ENTRY void* CL_API_CALL -clDeviceMemAllocINTEL( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -typedef CL_API_ENTRY void* (CL_API_CALL * -clDeviceMemAllocINTEL_fn)( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -extern CL_API_ENTRY void* CL_API_CALL -clSharedMemAllocINTEL( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -typedef CL_API_ENTRY void* (CL_API_CALL * -clSharedMemAllocINTEL_fn)( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL -clMemFreeINTEL( - cl_context context, - void* ptr); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clMemFreeINTEL_fn)( - cl_context context, - void* ptr); - -extern CL_API_ENTRY cl_int CL_API_CALL -clMemBlockingFreeINTEL( - cl_context context, - void* ptr); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clMemBlockingFreeINTEL_fn)( - cl_context context, - void* ptr); - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetMemAllocInfoINTEL( - cl_context context, - const void* ptr, - cl_mem_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clGetMemAllocInfoINTEL_fn)( - cl_context context, - const void* ptr, - cl_mem_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetKernelArgMemPointerINTEL( - cl_kernel kernel, - cl_uint arg_index, - const void* arg_value); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clSetKernelArgMemPointerINTEL_fn)( - cl_kernel kernel, - cl_uint arg_index, - const void* arg_value); - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemsetINTEL( /* Deprecated */ - cl_command_queue command_queue, - void* dst_ptr, - cl_int value, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemsetINTEL_fn)( /* Deprecated */ - cl_command_queue command_queue, - void* dst_ptr, - cl_int value, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemFillINTEL( - cl_command_queue command_queue, - void* dst_ptr, - const void* pattern, - size_t pattern_size, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemFillINTEL_fn)( - cl_command_queue command_queue, - void* dst_ptr, - const void* pattern, - size_t pattern_size, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemcpyINTEL( - cl_command_queue command_queue, - cl_bool blocking, - void* dst_ptr, - const void* src_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemcpyINTEL_fn)( - cl_command_queue command_queue, - cl_bool blocking, - void* dst_ptr, - const void* src_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -#ifdef CL_VERSION_1_2 - -/* Because these APIs use cl_mem_migration_flags, they require - OpenCL 1.2: */ - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMigrateMemINTEL( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMigrateMemINTEL_fn)( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemAdviseINTEL( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_advice_intel advice, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemAdviseINTEL_fn)( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_advice_intel advice, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -/*************************************************** -* cl_intel_create_buffer_with_properties extension * -****************************************************/ - -#define cl_intel_create_buffer_with_properties 1 - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateBufferWithPropertiesINTEL( - cl_context context, - const cl_mem_properties_intel* properties, - cl_mem_flags flags, - size_t size, - void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL * -clCreateBufferWithPropertiesINTEL_fn)( - cl_context context, - const cl_mem_properties_intel* properties, - cl_mem_flags flags, - size_t size, - void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; - -/****************************************** -* cl_intel_mem_channel_property extension * -*******************************************/ - -#define CL_MEM_CHANNEL_INTEL 0x4213 - -/********************************* -* cl_intel_mem_force_host_memory * -**********************************/ - -#define cl_intel_mem_force_host_memory 1 - -/* cl_mem_flags */ -#define CL_MEM_FORCE_HOST_MEMORY_INTEL (1 << 20) - -#ifdef __cplusplus -} -#endif - -#endif /* __CL_EXT_INTEL_H */ +#include +#pragma message("The Intel extensions have been moved into cl_ext.h. Please include cl_ext.h directly.") diff --git a/include/CL/cl_gl.h b/include/CL/cl_gl.h index b587f02a9..327746508 100644 --- a/include/CL/cl_gl.h +++ b/include/CL/cl_gl.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. + * Copyright (c) 2008-2021 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,21 +102,21 @@ clEnqueueReleaseGLObjects(cl_command_queue command_queue, /* Deprecated OpenCL 1.1 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateFromGLTexture2D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateFromGLTexture3D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; /* cl_khr_gl_sharing extension */ @@ -145,13 +145,48 @@ clGetGLContextInfoKHR(const cl_context_properties * properties, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( +typedef cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( const cl_context_properties * properties, cl_gl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); +/* + * cl_khr_gl_event extension + */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR(cl_context context, + cl_GLsync sync, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +/*************************************************************** +* cl_intel_sharing_format_query_gl +***************************************************************/ +#define cl_intel_sharing_format_query_gl 1 + +/* when cl_khr_gl_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedGLTextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_GLenum* gl_formats, + cl_uint* num_texture_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedGLTextureFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_GLenum* gl_formats, + cl_uint* num_texture_formats) ; + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_gl_ext.h b/include/CL/cl_gl_ext.h index 52107b111..8ec818167 100644 --- a/include/CL/cl_gl_ext.h +++ b/include/CL/cl_gl_ext.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. + * Copyright (c) 2008-2021 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,27 +14,5 @@ * limitations under the License. ******************************************************************************/ -#ifndef __OPENCL_CL_GL_EXT_H -#define __OPENCL_CL_GL_EXT_H - -#ifdef __cplusplus -extern "C" { -#endif - #include - -/* - * cl_khr_gl_event extension - */ -#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D - -extern CL_API_ENTRY cl_event CL_API_CALL -clCreateEventFromGLsyncKHR(cl_context context, - cl_GLsync sync, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_GL_EXT_H */ +#pragma message("All OpenGL-related extensions have been moved into cl_gl.h. Please include cl_gl.h directly.") diff --git a/include/CL/cl_icd.h b/include/CL/cl_icd.h index 8ff8b94f9..360b87030 100644 --- a/include/CL/cl_icd.h +++ b/include/CL/cl_icd.h @@ -41,35 +41,35 @@ extern "C" { /* API function pointer definitions */ // Platform APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( +typedef cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Device APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( +typedef cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( cl_device_id in_device, const cl_device_partition_property *partition_properties, cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDevice)( +typedef cl_int(CL_API_CALL *cl_api_clRetainDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDevice)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; #else @@ -81,36 +81,36 @@ typedef void *cl_api_clReleaseDevice; #endif // Context APIs -typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContext)( +typedef cl_context(CL_API_CALL *cl_api_clCreateContext)( const cl_context_properties *properties, cl_uint num_devices, const cl_device_id *devices, void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( +typedef cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( const cl_context_properties *properties, cl_device_type device_type, void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainContext)( +typedef cl_int(CL_API_CALL *cl_api_clRetainContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseContext)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetContextInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetContextInfo)( cl_context context, cl_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Command Queue APIs -typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( +typedef cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY +typedef cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueueWithProperties)( cl_context /* context */, cl_device_id /* device */, const cl_queue_properties * /* properties */, @@ -122,25 +122,25 @@ typedef void *cl_api_clCreateCommandQueueWithProperties; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( +typedef cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Memory Object APIs -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateImage)( cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; @@ -153,17 +153,17 @@ typedef void *cl_api_clCreateImage; #ifdef CL_VERSION_3_0 -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBufferWithProperties)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateBufferWithProperties)( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImageWithProperties)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateImageWithProperties)( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL* cl_api_clSetContextDestructorCallback)( +typedef cl_int(CL_API_CALL* cl_api_clSetContextDestructorCallback)( cl_context context, void(CL_CALLBACK* pfn_notify)(cl_context context, void* user_data), void* user_data) CL_API_SUFFIX__VERSION_3_0; @@ -176,43 +176,43 @@ typedef void *cl_api_clSetContextDestructorCallback; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainMemObject)( +typedef cl_int(CL_API_CALL *cl_api_clRetainMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( +typedef cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format *image_formats, cl_uint *num_image_formats) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetImageInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetImageInfo)( cl_mem image, cl_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreatePipe)( +typedef cl_mem(CL_API_CALL *cl_api_clCreatePipe)( cl_context /* context */, cl_mem_flags /* flags */, cl_uint /* pipe_packet_size */, cl_uint /* pipe_max_packets */, const cl_pipe_properties * /* properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clSVMAlloc)( +typedef void *(CL_API_CALL *cl_api_clSVMAlloc)( cl_context /* context */, cl_svm_mem_flags /* flags */, size_t /* size */, unsigned int /* alignment */)CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY void(CL_API_CALL *cl_api_clSVMFree)( +typedef void(CL_API_CALL *cl_api_clSVMFree)( cl_context /* context */, void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; @@ -226,24 +226,24 @@ typedef void *cl_api_clSVMFree; #endif // Sampler APIs -typedef CL_API_ENTRY cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( +typedef cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( cl_context context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainSampler)( +typedef cl_int(CL_API_CALL *cl_api_clRetainSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseSampler)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY +typedef cl_sampler(CL_API_CALL *cl_api_clCreateSamplerWithProperties)( cl_context /* context */, const cl_sampler_properties * /* sampler_properties */, @@ -256,18 +256,18 @@ typedef void *cl_api_clCreateSamplerWithProperties; #endif // Program Object APIs -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithBuiltInKernels)( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const char *kernel_names, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; @@ -278,13 +278,13 @@ typedef void *cl_api_clCreateProgramWithBuiltInKernels; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainProgram)( +typedef cl_int(CL_API_CALL *cl_api_clRetainProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseProgram)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( +typedef cl_int(CL_API_CALL *cl_api_clBuildProgram)( cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), @@ -292,14 +292,14 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCompileProgram)( +typedef cl_int(CL_API_CALL *cl_api_clCompileProgram)( cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), void *user_data) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clLinkProgram)( +typedef cl_program(CL_API_CALL *cl_api_clLinkProgram)( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_programs, const cl_program *input_programs, @@ -315,12 +315,12 @@ typedef void *cl_api_clLinkProgram; #ifdef CL_VERSION_2_2 -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clSetProgramSpecializationConstant)( cl_program program, cl_uint spec_id, size_t spec_size, const void *spec_value) CL_API_SUFFIX__VERSION_2_2; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( +typedef cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( cl_program program, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), void *user_data) CL_API_SUFFIX__VERSION_2_2; @@ -334,7 +334,7 @@ typedef void *cl_api_clSetProgramReleaseCallback; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( +typedef cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; #else @@ -343,41 +343,41 @@ typedef void *cl_api_clUnloadPlatformCompiler; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Kernel Object APIs -typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( +typedef cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( cl_program program, const char *kernel_name, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( +typedef cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( cl_program program, cl_uint num_kernels, cl_kernel *kernels, cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainKernel)( +typedef cl_int(CL_API_CALL *cl_api_clRetainKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseKernel)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArg)( +typedef cl_int(CL_API_CALL *cl_api_clSetKernelArg)( cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; @@ -388,28 +388,28 @@ typedef void *cl_api_clGetKernelArgInfo; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( +typedef cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( cl_kernel /* kernel */, cl_uint /* arg_index */, const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( +typedef cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */, const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( cl_kernel /* in_kernel */, cl_device_id /*in_device*/, cl_kernel_sub_group_info /* param_name */, size_t /*input_value_size*/, const void * /*input_value*/, size_t /*param_value_size*/, void * /*param_value*/, - size_t * /*param_value_size_ret*/) CL_EXT_SUFFIX__VERSION_2_0; + size_t * /*param_value_size_ret*/) CL_API_SUFFIX__VERSION_2_0; #else @@ -420,33 +420,33 @@ typedef void *cl_api_clGetKernelSubGroupInfoKHR; #endif // Event Object APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clWaitForEvents)( +typedef cl_int(CL_API_CALL *cl_api_clWaitForEvents)( cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetEventInfo)( cl_event event, cl_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) +typedef cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) +typedef cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) CL_API_SUFFIX__VERSION_1_0; // Profiling APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Flush and Finish APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFlush)( +typedef cl_int(CL_API_CALL *cl_api_clFlush)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFinish)( +typedef cl_int(CL_API_CALL *cl_api_clFinish)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; // Enqueued Commands APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -454,7 +454,7 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( #ifdef CL_VERSION_1_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t *buffer_origin, const size_t *host_origin, const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, @@ -468,7 +468,7 @@ typedef void *cl_api_clEnqueueReadBufferRect; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -476,7 +476,7 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( #ifdef CL_VERSION_1_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t *buffer_origin, const size_t *host_origin, const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, @@ -492,7 +492,7 @@ typedef void *cl_api_clEnqueueWriteBufferRect; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -504,7 +504,7 @@ typedef void *cl_api_clEnqueueFillBuffer; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -512,7 +512,7 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( #ifdef CL_VERSION_1_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin, const size_t *dst_origin, const size_t *region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, @@ -526,14 +526,14 @@ typedef void *cl_api_clEnqueueCopyBufferRect; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, const size_t *origin, const size_t *region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, @@ -542,7 +542,7 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( cl_command_queue command_queue, cl_mem image, const void *fill_color, const size_t origin[3], const size_t region[3], cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -554,45 +554,45 @@ typedef void *cl_api_clEnqueueFillImage; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, const size_t *src_origin, const size_t *region, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, size_t src_offset, const size_t *dst_origin, const size_t *region, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( +typedef void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapImage)( +typedef void *(CL_API_CALL *cl_api_clEnqueueMapImage)( cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, cl_map_flags map_flags, const size_t *origin, const size_t *region, size_t *image_row_pitch, size_t *image_slice_pitch, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -604,19 +604,19 @@ typedef void *cl_api_clEnqueueMigrateMemObjects; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueTask)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueTask)( cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *), void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, const void **args_mem_loc, cl_uint num_events_in_wait_list, @@ -625,17 +625,17 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY void *( +typedef void *( CL_API_CALL *cl_api_clGetExtensionFunctionAddressForPlatform)( cl_platform_id platform, const char *function_name)CL_API_SUFFIX__VERSION_1_2; @@ -652,7 +652,7 @@ typedef void *cl_api_clGetExtensionFunctionAddressForPlatform; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, void ** /* svm_pointers */, void(CL_CALLBACK *pfn_free_func)(cl_command_queue /* queue */, @@ -663,28 +663,28 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void * /* dst_ptr */, const void * /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( cl_command_queue /* command_queue */, void * /* svm_ptr */, const void * /* pattern */, size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, void * /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( cl_command_queue /* command_queue */, void * /* svm_ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, @@ -701,119 +701,119 @@ typedef void *cl_api_clEnqueueSVMUnmap; #endif // Deprecated APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( +typedef cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *old_properties) - CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; + CL_API_SUFFIX__VERSION_1_0_DEPRECATED; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( cl_context context, cl_mem_flags flags, const cl_image_format *image_format, size_t image_width, size_t image_height, size_t image_row_pitch, - void *host_ptr, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( cl_context context, cl_mem_flags flags, const cl_image_format *image_format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch, void *host_ptr, - cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) - CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +typedef cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) + CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( cl_command_queue command_queue, - cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_event *event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( cl_command_queue command_queue, cl_uint num_events, - const cl_event *event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + const cl_event *event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( - cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +typedef cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( - const char *function_name)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +typedef void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( + const char *function_name)CL_API_SUFFIX__VERSION_1_1_DEPRECATED; // GL and other APIs -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( cl_context context, cl_mem_flags flags, cl_GLuint bufobj, int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( cl_mem memobj, cl_gl_object_type *gl_object_type, cl_GLuint *gl_object_name) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; /* cl_khr_gl_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( +typedef cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( const cl_context_properties *properties, cl_gl_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); /* cl_khr_gl_event */ -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( +typedef cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( cl_context context, cl_GLsync sync, cl_int *errcode_ret); #if defined(_WIN32) /* cl_khr_d3d10_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D10ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D10ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, @@ -848,32 +848,32 @@ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( const cl_event *event_wait_list, cl_event *event); /* cl_khr_d3d11_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D11ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, @@ -881,26 +881,26 @@ cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( cl_event *event) CL_API_SUFFIX__VERSION_1_2; /* cl_khr_dx9_media_sharing */ -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR)( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr *media_adapters_type, void *media_adapters, cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, cl_uint plane, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, @@ -987,29 +987,29 @@ typedef void *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR; #ifdef CL_VERSION_1_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetEventCallback)( +typedef cl_int(CL_API_CALL *cl_api_clSetEventCallback)( cl_event /* event */, cl_int /* command_exec_callback_type */, void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clSetMemObjectDestructorCallback)( cl_mem /* memobj */, void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, void * /*user_data*/), void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( +typedef cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( +typedef cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; @@ -1023,68 +1023,68 @@ typedef void *cl_api_clSetUserEventStatus; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( +typedef cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( cl_device_id in_device, const cl_device_partition_property_ext *partition_properties, cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( +typedef cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; /* cl_khr_egl_image */ -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( cl_context context, CLeglDisplayKHR display, CLeglImageKHR image, cl_mem_flags flags, const cl_egl_image_properties_khr *properties, cl_int *errcode_ret); -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); /* cl_khr_egl_event */ -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( +typedef cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, cl_int *errcode_ret); #ifdef CL_VERSION_2_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( +typedef cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( cl_context context, cl_device_id device, cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( cl_context context, const void *il, size_t length, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( +typedef cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( cl_kernel source_kernel, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( cl_command_queue command_queue, cl_uint num_svm_pointers, const void **svm_pointers, const size_t *sizes, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( cl_device_id device, cl_ulong *device_timestamp, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetHostTimer)( +typedef cl_int(CL_API_CALL *cl_api_clGetHostTimer)( cl_device_id device, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; #else @@ -1099,7 +1099,7 @@ typedef void *cl_api_clGetHostTimer; #endif -/* Vendor dispatch table struture */ +/* Vendor dispatch table structure */ typedef struct _cl_icd_dispatch { /* OpenCL 1.0 */ diff --git a/include/CL/cl_layer.h b/include/CL/cl_layer.h new file mode 100644 index 000000000..59dae7506 --- /dev/null +++ b/include/CL/cl_layer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * OpenCL is a trademark of Apple Inc. used under license by Khronos. + */ + +#ifndef OPENCL_CL_LAYER_H +#define OPENCL_CL_LAYER_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_layer_info; +typedef cl_uint cl_layer_api_version; +#define CL_LAYER_API_VERSION 0x4240 +#define CL_LAYER_API_VERSION_100 100 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetLayerInfo(cl_layer_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +typedef cl_int +(CL_API_CALL *pfn_clGetLayerInfo)(cl_layer_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL +clInitLayer(cl_uint num_entries, + const cl_icd_dispatch *target_dispatch, + cl_uint *num_entries_ret, + const cl_icd_dispatch **layer_dispatch_ret); + +typedef cl_int +(CL_API_CALL *pfn_clInitLayer)(cl_uint num_entries, + const cl_icd_dispatch *target_dispatch, + cl_uint *num_entries_ret, + const cl_icd_dispatch **layer_dispatch_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* OPENCL_CL_LAYER_H */ diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h index 2d69cc4e5..e7a0d6f47 100644 --- a/include/CL/cl_platform.h +++ b/include/CL/cl_platform.h @@ -24,13 +24,25 @@ extern "C" { #endif #if defined(_WIN32) - #define CL_API_ENTRY - #define CL_API_CALL __stdcall - #define CL_CALLBACK __stdcall + #if !defined(CL_API_ENTRY) + #define CL_API_ENTRY + #endif + #if !defined(CL_API_CALL) + #define CL_API_CALL __stdcall + #endif + #if !defined(CL_CALLBACK) + #define CL_CALLBACK __stdcall + #endif #else - #define CL_API_ENTRY - #define CL_API_CALL - #define CL_CALLBACK + #if !defined(CL_API_ENTRY) + #define CL_API_ENTRY + #endif + #if !defined(CL_API_CALL) + #define CL_API_CALL + #endif + #if !defined(CL_CALLBACK) + #define CL_CALLBACK + #endif #endif /* @@ -41,86 +53,99 @@ extern "C" { * deprecation but is deprecated in versions later than 1.1. */ -#define CL_EXTENSION_WEAK_LINK -#define CL_API_SUFFIX__VERSION_1_0 -#define CL_EXT_SUFFIX__VERSION_1_0 -#define CL_API_SUFFIX__VERSION_1_1 -#define CL_EXT_SUFFIX__VERSION_1_1 -#define CL_API_SUFFIX__VERSION_1_2 -#define CL_EXT_SUFFIX__VERSION_1_2 -#define CL_API_SUFFIX__VERSION_2_0 -#define CL_EXT_SUFFIX__VERSION_2_0 -#define CL_API_SUFFIX__VERSION_2_1 -#define CL_EXT_SUFFIX__VERSION_2_1 -#define CL_API_SUFFIX__VERSION_2_2 -#define CL_EXT_SUFFIX__VERSION_2_2 -#define CL_API_SUFFIX__VERSION_3_0 -#define CL_EXT_SUFFIX__VERSION_3_0 -#define CL_API_SUFFIX__EXPERIMENTAL -#define CL_EXT_SUFFIX__EXPERIMENTAL +#ifndef CL_API_SUFFIX_USER +#define CL_API_SUFFIX_USER +#endif + +#ifndef CL_API_PREFIX_USER +#define CL_API_PREFIX_USER +#endif + +#define CL_API_SUFFIX_COMMON CL_API_SUFFIX_USER +#define CL_API_PREFIX_COMMON CL_API_PREFIX_USER + +#define CL_API_SUFFIX__VERSION_1_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_3_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__EXPERIMENTAL CL_API_SUFFIX_COMMON #ifdef __GNUC__ - #define CL_EXT_SUFFIX_DEPRECATED __attribute__((deprecated)) - #define CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX_DEPRECATED __attribute__((deprecated)) + #define CL_API_PREFIX_DEPRECATED #elif defined(_WIN32) - #define CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX_DEPRECATED __declspec(deprecated) + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED __declspec(deprecated) #else - #define CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS - #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS - #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS - #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS - #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_2_APIS - #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #if (defined (_WIN32) && defined(_MSC_VER)) +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlanguage-extension-token" +#endif + +/* intptr_t is used in cl.h and provided by stddef.h in Visual C++, but not in clang */ +/* stdint.h was missing before Visual Studio 2010, include it for later versions and for clang */ +#if defined(__clang__) || _MSC_VER >= 1600 + #include +#endif + /* scalar types */ typedef signed __int8 cl_char; typedef unsigned __int8 cl_uchar; @@ -135,6 +160,10 @@ typedef unsigned __int16 cl_half; typedef float cl_float; typedef double cl_double; +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 @@ -481,25 +510,26 @@ typedef unsigned int cl_GLenum; #if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ -#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#elif defined(_WIN32) && defined(_MSC_VER) && !defined(__STDC__) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined(__GNUC__) && ! defined(__STRICT_ANSI__) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined(__clang__) #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ __extension__ -#elif defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__) - #if _MSC_VER >= 1500 - /* Microsoft Developer Studio 2008 supports anonymous structs, but - * complains by default. */ - #define __CL_HAS_ANON_STRUCT__ 1 - #define __CL_ANON_STRUCT__ - /* Disable warning C4201: nonstandard extension used : nameless - * struct/union */ - #pragma warning( push ) - #pragma warning( disable : 4201 ) - #endif #else #define __CL_HAS_ANON_STRUCT__ 0 #define __CL_ANON_STRUCT__ #endif +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) +#endif + /* Define alignment keys */ #if defined( __GNUC__ ) || defined(__INTEGRITY) #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) @@ -1375,10 +1405,8 @@ typedef union } #endif -#if defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__) - #if _MSC_VER >=1500 +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ #pragma warning( pop ) - #endif #endif #endif /* __CL_PLATFORM_H */ diff --git a/include/CL/cl_va_api_media_sharing_intel.h b/include/CL/cl_va_api_media_sharing_intel.h index 0e7cd4d6f..547e90e88 100644 --- a/include/CL/cl_va_api_media_sharing_intel.h +++ b/include/CL/cl_va_api_media_sharing_intel.h @@ -13,30 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -/*****************************************************************************\ - -Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. - -THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE -MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -File Name: cl_va_api_media_sharing_intel.h - -Abstract: - -Notes: - -\*****************************************************************************/ - #ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H #define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H @@ -49,6 +25,33 @@ File Name: cl_va_api_media_sharing_intel.h extern "C" { #endif +/*************************************************************** +* cl_intel_sharing_format_query_va_api +***************************************************************/ +#define cl_intel_sharing_format_query_va_api 1 + +/* when cl_intel_va_api_media_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedVA_APIMediaSurfaceFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + VAImageFormat* va_api_formats, + cl_uint* num_surface_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedVA_APIMediaSurfaceFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + VAImageFormat* va_api_formats, + cl_uint* num_surface_formats) ; + /****************************************** * cl_intel_va_api_media_sharing extension * *******************************************/ @@ -92,16 +95,16 @@ clGetDeviceIDsFromVA_APIMediaAdapterINTEL( cl_va_api_device_set_intel media_adapter_set, cl_uint num_entries, cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)( +typedef cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)( cl_platform_id platform, cl_va_api_device_source_intel media_adapter_type, void* media_adapter, cl_va_api_device_set_intel media_adapter_set, cl_uint num_entries, cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromVA_APIMediaSurfaceINTEL( @@ -109,14 +112,14 @@ clCreateFromVA_APIMediaSurfaceINTEL( cl_mem_flags flags, VASurfaceID* surface, cl_uint plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)( +typedef cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)( cl_context context, cl_mem_flags flags, VASurfaceID* surface, cl_uint plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireVA_APIMediaSurfacesINTEL( @@ -125,15 +128,15 @@ clEnqueueAcquireVA_APIMediaSurfacesINTEL( const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event* event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event* event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseVA_APIMediaSurfacesINTEL( @@ -142,15 +145,15 @@ clEnqueueReleaseVA_APIMediaSurfacesINTEL( const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event* event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event* event) CL_API_SUFFIX__VERSION_1_2; #ifdef __cplusplus } diff --git a/include/CL/opencl.hpp b/include/CL/opencl.hpp index 834fc104b..3a953c8c8 100644 --- a/include/CL/opencl.hpp +++ b/include/CL/opencl.hpp @@ -165,10 +165,6 @@ * the cl::allocate_pointer functions are not defined and may be * defined by the user before opencl.hpp is included. * - * - CL_HPP_ENABLE_DEVICE_FISSION - * - * Enables device fission for OpenCL 1.2 platforms. - * * - CL_HPP_ENABLE_EXCEPTIONS * * Enable exceptions for use in the C++ bindings header. This is the @@ -194,10 +190,22 @@ * applies to use of cl::Program construction and other program * build variants. * + * - CL_HPP_USE_CL_DEVICE_FISSION + * + * Enable the cl_ext_device_fission extension. + * + * - CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR + * + * Enable the cl_khr_image2d_from_buffer extension. + * * - CL_HPP_USE_CL_SUB_GROUPS_KHR * * Enable the cl_khr_subgroups extension. * + * - CL_HPP_USE_DX_INTEROP + * + * Enable the cl_khr_d3d10_sharing extension. + * * - CL_HPP_USE_IL_KHR * * Enable the cl_khr_il_program extension. @@ -209,6 +217,10 @@ * bindings, including support for the optional exception feature and * also the supplied vector and string classes, see following sections for * decriptions of these features. + * + * Note: the C++ bindings use std::call_once and therefore may need to be + * compiled using special command-line options (such as "-pthread") on some + * platforms! * * \code #define CL_HPP_ENABLE_EXCEPTIONS @@ -224,28 +236,30 @@ int main(void) { - // Filter for a 2.0 platform and set it as the default + // Filter for a 2.0 or newer platform and set it as the default std::vector platforms; cl::Platform::get(&platforms); cl::Platform plat; for (auto &p : platforms) { std::string platver = p.getInfo(); - if (platver.find("OpenCL 2.") != std::string::npos) { + if (platver.find("OpenCL 2.") != std::string::npos || + platver.find("OpenCL 3.") != std::string::npos) { + // Note: an OpenCL 3.x platform may not support all required features! plat = p; } } - if (plat() == 0) { - std::cout << "No OpenCL 2.0 platform found."; + if (plat() == 0) { + std::cout << "No OpenCL 2.0 or newer platform found.\n"; return -1; } cl::Platform newP = cl::Platform::setDefault(plat); if (newP != plat) { - std::cout << "Error setting default platform."; + std::cout << "Error setting default platform.\n"; return -1; } - // Use C++11 raw string literals for kernel source code + // C++11 raw string literal for the first kernel std::string kernel1{R"CLC( global int globalA; kernel void updateGlobal() @@ -253,6 +267,8 @@ globalA = 75; } )CLC"}; + + // Raw string literal for the second kernel std::string kernel2{R"CLC( typedef struct { global int *bar; } Foo; kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, @@ -279,8 +295,9 @@ } )CLC"}; - // New simpler string interface style - std::vector programStrings {kernel1, kernel2}; + std::vector programStrings; + programStrings.push_back(kernel1); + programStrings.push_back(kernel2); cl::Program vectorAddProgram(programStrings); try { @@ -319,10 +336,9 @@ std::vector>> inputA(numElements, 1, svmAlloc); cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); - // ////////////// - // Traditional cl_mem allocations + std::vector output(numElements, 0xdeadbeef); cl::Buffer outputBuffer(begin(output), end(output), false); cl::Pipe aPipe(sizeof(cl_int), numElements / 2); @@ -346,14 +362,8 @@ // This one was not passed as a parameter vectorAddKernel.setSVMPointers(anSVMInt); - // Hand control of coarse allocations to runtime - cl::enqueueUnmapSVM(anSVMInt); - cl::enqueueUnmapSVM(fooPointer); - cl::unmapSVM(inputB); - cl::unmapSVM(output2); - - cl_int error; - vectorAddKernel( + cl_int error; + vectorAddKernel( cl::EnqueueArgs( cl::NDRange(numElements/2), cl::NDRange(numElements/2)), @@ -364,12 +374,10 @@ 3, aPipe, defaultDeviceQueue, - error + error ); cl::copy(outputBuffer, begin(output), end(output)); - // Grab the SVM output vector using a map - cl::mapSVM(output2); cl::Device d = cl::Device::getDefault(); @@ -548,19 +556,26 @@ // Define deprecated prefixes and suffixes to ensure compilation // in case they are not pre-defined -#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) - -#if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) +#if !defined(CL_API_PREFIX__VERSION_1_1_DEPRECATED) +#define CL_API_PREFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_API_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_API_SUFFIX__VERSION_1_1_DEPRECATED) +#define CL_API_SUFFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_API_SUFFIX__VERSION_1_1_DEPRECATED) + +#if !defined(CL_API_PREFIX__VERSION_1_2_DEPRECATED) +#define CL_API_PREFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_API_PREFIX__VERSION_1_2_DEPRECATED) +#if !defined(CL_API_SUFFIX__VERSION_1_2_DEPRECATED) +#define CL_API_SUFFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_API_SUFFIX__VERSION_1_2_DEPRECATED) + +#if !defined(CL_API_PREFIX__VERSION_2_2_DEPRECATED) +#define CL_API_PREFIX__VERSION_2_2_DEPRECATED +#endif // #if !defined(CL_API_PREFIX__VERSION_2_2_DEPRECATED) +#if !defined(CL_API_SUFFIX__VERSION_2_2_DEPRECATED) +#define CL_API_SUFFIX__VERSION_2_2_DEPRECATED +#endif // #if !defined(CL_API_SUFFIX__VERSION_2_2_DEPRECATED) #if !defined(CL_CALLBACK) #define CL_CALLBACK @@ -1317,14 +1332,20 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, string) \ F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ \ + F(cl_kernel_work_group_info, CL_KERNEL_GLOBAL_WORK_SIZE, cl::detail::size_t_array) \ + \ + F(cl_device_info, CL_DEVICE_LINKER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, size_type) \ F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl::Device) \ F(cl_device_info, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, cl_uint) \ F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, cl::vector) \ F(cl_device_info, CL_DEVICE_PARTITION_TYPE, cl::vector) \ F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, size_type) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, cl_bool) \ F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) \ + F(cl_device_info, CL_DEVICE_PRINTF_BUFFER_SIZE, size_type) \ \ F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ @@ -1344,7 +1365,11 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_device_info, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, cl_uint) \ - F(cl_device_info, CL_DEVICE_SUB_GROUP_SIZES_INTEL, cl::vector) \ + F(cl_device_info, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, cl_uint ) \ + F(cl_device_info, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, size_type ) \ + F(cl_device_info, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_type ) \ F(cl_profiling_info, CL_PROFILING_COMMAND_COMPLETE, cl_ulong) \ F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, cl_bool) \ F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_PTRS, void**) \ @@ -1363,17 +1388,17 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_program_info, CL_PROGRAM_IL_KHR, cl::vector) #define CL_HPP_PARAM_NAME_INFO_2_1_(F) \ - F(cl_platform_info, CL_PLATFORM_HOST_TIMER_RESOLUTION, size_type) \ + F(cl_platform_info, CL_PLATFORM_HOST_TIMER_RESOLUTION, cl_ulong) \ F(cl_program_info, CL_PROGRAM_IL, cl::vector) \ - F(cl_kernel_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ - F(cl_kernel_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) \ F(cl_device_info, CL_DEVICE_MAX_NUM_SUB_GROUPS, cl_uint) \ F(cl_device_info, CL_DEVICE_IL_VERSION, string) \ F(cl_device_info, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, cl_bool) \ F(cl_command_queue_info, CL_QUEUE_DEVICE_DEFAULT, cl::DeviceCommandQueue) \ F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, size_type) \ F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) + F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) #define CL_HPP_PARAM_NAME_INFO_2_2_(F) \ F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, cl_bool) \ @@ -1459,7 +1484,7 @@ CL_HPP_PARAM_NAME_INFO_3_0_(CL_HPP_DECLARE_PARAM_TRAITS_) CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // #if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 -#if defined(CL_HPP_USE_IL_KHR) +#if defined(CL_HPP_USE_IL_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 CL_HPP_PARAM_NAME_INFO_IL_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // #if defined(CL_HPP_USE_IL_KHR) @@ -1497,6 +1522,28 @@ CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_CL3_SHARED_(CL_HPP_DECLARE_PARAM_TR CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_KHRONLY_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_extended_versioning +#if defined(cl_khr_device_uuid) +using uuid_array = array; +using luid_array = array; +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_UUID_KHR, uuid_array) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DRIVER_UUID_KHR, uuid_array) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_VALID_KHR, cl_bool) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_KHR, luid_array) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NODE_MASK_KHR, cl_uint) +#endif + +#if defined(cl_khr_pci_bus_info) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PCI_BUS_INFO_KHR, cl_device_pci_bus_info_khr) +#endif + +#if defined(cl_khr_integer_dot_product) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, cl_device_integer_dot_product_capabilities_khr) +#if defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, cl_device_integer_dot_product_acceleration_properties_khr) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR, cl_device_integer_dot_product_acceleration_properties_khr) +#endif // defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) +#endif // defined(cl_khr_integer_dot_product) + #ifdef CL_PLATFORM_ICD_SUFFIX_KHR CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, string) #endif @@ -1504,7 +1551,6 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, strin #ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) #endif - #ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, vector) #endif @@ -1535,6 +1581,9 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUT #ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) #endif +#ifdef CL_DEVICE_BOARD_NAME_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_BOARD_NAME_AMD, string) +#endif #ifdef CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, cl_ulong) @@ -1545,6 +1594,9 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_JOB_SLOTS_ARM, cl_uint) #ifdef CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, cl_bitfield) #endif +#ifdef CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM, vector) +#endif #ifdef CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM, cl_uint) #endif @@ -1914,6 +1966,7 @@ class Wrapper retVal = true; #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + (void)device; return retVal; } @@ -2098,6 +2151,9 @@ struct ImageFormat : public cl_image_format image_channel_data_type = type; } + //! \brief Copy constructor. + ImageFormat(const ImageFormat &other) { *this = other; } + //! \brief Assignment operator. ImageFormat& operator = (const ImageFormat& rhs) { @@ -2351,7 +2407,7 @@ class Device : public detail::Wrapper const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + cl_uint * /*num_devices*/ ) CL_API_SUFFIX__VERSION_1_1; static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSubDevicesEXT); @@ -2761,8 +2817,8 @@ CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Platform::default_error_ = CL_SUCCESS; * Unload the OpenCL compiler. * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int -UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int +UnloadCompiler() CL_API_SUFFIX__VERSION_1_1_DEPRECATED; inline cl_int UnloadCompiler() { @@ -2852,7 +2908,7 @@ class Context */ Context( const vector& devices, - cl_context_properties* properties = NULL, + const cl_context_properties* properties = NULL, void (CL_CALLBACK * notifyFptr)( const char *, const void *, @@ -2887,7 +2943,7 @@ class Context */ Context( const Device& device, - cl_context_properties* properties = NULL, + const cl_context_properties* properties = NULL, void (CL_CALLBACK * notifyFptr)( const char *, const void *, @@ -2917,7 +2973,7 @@ class Context */ Context( cl_device_type type, - cl_context_properties* properties = NULL, + const cl_context_properties* properties = NULL, void (CL_CALLBACK * notifyFptr)( const char *, const void *, @@ -3950,7 +4006,7 @@ class Buffer : public Memory Context context = Context::getDefault(err); if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); } else { object_ = ::clCreateBuffer(context(), flags, size, 0, &error); } @@ -4479,12 +4535,11 @@ class Image1D : public Image cl_int* err = NULL) { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D, - width, - 0, 0, 0, 0, 0, 0, 0, 0 - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D; + desc.image_width = width; + object_ = ::clCreateImage( context(), flags, @@ -4567,13 +4622,12 @@ class Image1DBuffer : public Image cl_int* err = NULL) { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - width, - 0, 0, 0, 0, 0, 0, 0, - buffer() - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + desc.image_width = width; + desc.buffer = buffer(); + object_ = ::clCreateImage( context(), flags, @@ -4653,15 +4707,13 @@ class Image1DArray : public Image cl_int* err = NULL) { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_ARRAY, - width, - 0, 0, // height, depth (unused) - arraySize, - rowPitch, - 0, 0, 0, 0 - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; + desc.image_width = width; + desc.image_array_size = arraySize; + desc.image_row_pitch = rowPitch; + object_ = ::clCreateImage( context(), flags, @@ -4768,15 +4820,12 @@ class Image2D : public Image #if CL_HPP_TARGET_OPENCL_VERSION >= 120 if (useCreateImage) { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, 0 - }; + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = row_pitch; + object_ = ::clCreateImage( context(), flags, @@ -4822,17 +4871,13 @@ class Image2D : public Image { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, - // Use buffer as input to image - sourceBuffer() - }; + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = row_pitch; + desc.buffer = sourceBuffer(); + object_ = ::clCreateImage( context(), 0, // flags inherited from buffer @@ -4886,19 +4931,16 @@ class Image2D : public Image // Update only the channel order. // Channel format inherited from source. sourceFormat.image_channel_order = order; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - sourceWidth, - sourceHeight, - 0, 0, // depth (unused), array size (unused) - sourceRowPitch, - 0, // slice pitch (unused) - sourceNumMIPLevels, - sourceNumSamples, - // Use buffer as input to image - sourceImage() - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = sourceWidth; + desc.image_height = sourceHeight; + desc.image_row_pitch = sourceRowPitch; + desc.num_mip_levels = sourceNumMIPLevels; + desc.num_samples = sourceNumSamples; + desc.buffer = sourceImage(); + object_ = ::clCreateImage( context(), 0, // flags should be inherited from mem_object @@ -4978,7 +5020,7 @@ class Image2D : public Image * \see Memory * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D +class CL_API_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D { public: /*! \brief Constructs an Image2DGL in a specified context, from a given @@ -5061,7 +5103,7 @@ class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D return *this; } -} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +} CL_API_SUFFIX__VERSION_1_1_DEPRECATED; #endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS #if CL_HPP_TARGET_OPENCL_VERSION >= 120 @@ -5084,17 +5126,15 @@ class Image2DArray : public Image cl_int* err = NULL) { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D_ARRAY, - width, - height, - 0, // depth (unused) - arraySize, - rowPitch, - slicePitch, - 0, 0, 0 - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + desc.image_width = width; + desc.image_height = height; + desc.image_array_size = arraySize; + desc.image_row_pitch = rowPitch; + desc.image_slice_pitch = slicePitch; + object_ = ::clCreateImage( context(), flags, @@ -5199,17 +5239,14 @@ class Image3D : public Image #if CL_HPP_TARGET_OPENCL_VERSION >= 120 if (useCreateImage) { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE3D, - width, - height, - depth, - 0, // array size (unused) - row_pitch, - slice_pitch, - 0, 0, 0 - }; + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = width; + desc.image_height = height; + desc.image_depth = depth; + desc.image_row_pitch = row_pitch; + desc.image_slice_pitch = slice_pitch; + object_ = ::clCreateImage( context(), flags, @@ -6413,8 +6450,7 @@ class Program : public detail::Wrapper static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); - return detail::errHandler( - pfn_clCreateProgramWithILKHR( + object_ = pfn_clCreateProgramWithILKHR( context(), static_cast(IL.data()), IL.size(), &error); #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 @@ -6467,8 +6503,7 @@ class Program : public detail::Wrapper static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); - return detail::errHandler( - pfn_clCreateProgramWithILKHR( + object_ = pfn_clCreateProgramWithILKHR( context(), static_cast(IL.data()), IL.size(), &error); #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 @@ -6696,7 +6731,7 @@ class Program : public detail::Wrapper notifyFptr, data); - BuildLogType buildLog(1); + BuildLogType buildLog(0); buildLog.push_back(std::make_pair(device, getBuildInfo(device))); return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, buildLog); } @@ -6867,9 +6902,9 @@ class Program : public detail::Wrapper * on a callback stack associated with program. The registered user callback * functions are called in the reverse order in which they were registered. */ - CL_EXT_PREFIX__VERSION_2_2_DEPRECATED cl_int setReleaseCallback( + CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int setReleaseCallback( void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), - void * user_data = NULL) CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED + void * user_data = NULL) CL_API_SUFFIX__VERSION_2_2_DEPRECATED { return detail::errHandler( ::clSetProgramReleaseCallback( @@ -7074,6 +7109,11 @@ inline QueueProperties operator|(QueueProperties lhs, QueueProperties rhs) return static_cast(static_cast(lhs) | static_cast(rhs)); } +inline QueueProperties operator&(QueueProperties lhs, QueueProperties rhs) +{ + return static_cast(static_cast(lhs) & static_cast(rhs)); +} + /*! \class CommandQueue * \brief CommandQueue interface for cl_command_queue. */ @@ -8574,10 +8614,10 @@ class CommandQueue : public detail::Wrapper } #if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) - CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( + CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( const Kernel& kernel, const vector* events = NULL, - Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + Event* event = NULL) const CL_API_SUFFIX__VERSION_1_2_DEPRECATED { cl_event tmp; cl_int err = detail::errHandler( @@ -8634,8 +8674,8 @@ class CommandQueue : public detail::Wrapper * Deprecated APIs for 1.2 */ #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event* event = NULL) const CL_API_SUFFIX__VERSION_1_1_DEPRECATED { cl_event tmp; cl_int err = detail::errHandler( @@ -8650,8 +8690,8 @@ class CommandQueue : public detail::Wrapper return err; } - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const vector& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const vector& events) const CL_API_SUFFIX__VERSION_1_1_DEPRECATED { return detail::errHandler( ::clEnqueueWaitForEvents( @@ -8787,8 +8827,8 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( * Deprecated APIs for 1.2 */ #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_API_SUFFIX__VERSION_1_1_DEPRECATED { return detail::errHandler( ::clEnqueueBarrier(object_), @@ -9134,7 +9174,7 @@ Buffer::Buffer( size_type size = sizeof(DataType)*(endIterator - startIterator); if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); } else { object_ = ::clCreateBuffer(context(), flags, size, 0, &error); } @@ -9187,7 +9227,7 @@ Buffer::Buffer( Context context = queue.getInfo(); if (useHostPtr) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); } else { object_ = ::clCreateBuffer(context(), flags, size, 0, &error); @@ -9309,7 +9349,7 @@ inline cl_int enqueueMapSVM( */ template inline cl_int enqueueMapSVM( - cl::pointer ptr, + cl::pointer &ptr, cl_bool blocking, cl_map_flags flags, size_type size, @@ -9333,7 +9373,7 @@ inline cl_int enqueueMapSVM( */ template inline cl_int enqueueMapSVM( - cl::vector container, + cl::vector &container, cl_bool blocking, cl_map_flags flags, const vector* events = NULL, From 8714c1ecf98997201eed46abf8d9895aa58bd6c2 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 5 Apr 2023 14:45:01 +0300 Subject: [PATCH 02/35] opencl.hpp: make clGetDeviceInfo recognize Intel Subgroup Size --- include/CL/opencl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/CL/opencl.hpp b/include/CL/opencl.hpp index 3a953c8c8..686aed66b 100644 --- a/include/CL/opencl.hpp +++ b/include/CL/opencl.hpp @@ -1398,7 +1398,8 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, size_type) \ F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) \ F(cl_kernel_sub_group_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) + F(cl_kernel_sub_group_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) \ + F(cl_device_info, CL_DEVICE_SUB_GROUP_SIZES_INTEL, cl::vector) #define CL_HPP_PARAM_NAME_INFO_2_2_(F) \ F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, cl_bool) \ From d34db9096acec21fd5ca8bdc42d286b619efed3d Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Tue, 9 May 2023 13:46:30 +0300 Subject: [PATCH 03/35] add include/CL/cl_ext_pocl.h (required for command buffer extensions) --- include/CL/cl_ext_pocl.h | 520 +++++++++++++++++++++++++++++++++++++++ include/CL/opencl.h | 1 + 2 files changed, 521 insertions(+) create mode 100644 include/CL/cl_ext_pocl.h diff --git a/include/CL/cl_ext_pocl.h b/include/CL/cl_ext_pocl.h new file mode 100644 index 000000000..68b2180d9 --- /dev/null +++ b/include/CL/cl_ext_pocl.h @@ -0,0 +1,520 @@ +/******************************************************************************* + * Copyright (c) 2021 Tampere University + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __CL_EXT_POCL_H +#define __CL_EXT_POCL_H + +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + +/*********************************** +* cl_pocl_content_size extension * +************************************/ + +#define cl_pocl_content_size 1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetContentSizeBufferPoCL( + cl_mem buffer, + cl_mem content_size_buffer) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clSetContentSizeBufferPoCL_fn)( + cl_mem buffer, + cl_mem content_size_buffer) CL_API_SUFFIX__VERSION_1_2; + + +/*********************************** +* cl_pocl_svm_rect + +* cl_pocl_command_buffer_svm + +* cl_pocl_command_buffer_host_exec + +* cl_pocl_command_buffer_host_buffer +* extensions +************************************/ + +// SVM copy/fill functions +#define cl_pocl_command_buffer_svm 1 + +// cl_mem & host related functions (clCommandReadBuffer etc) +#define cl_pocl_command_buffer_host_buffer 1 + +// clCommandHostFuncPOCL, clCommandWaitForEventPOCL, clCommandSignalEventPOCL +#define cl_pocl_command_buffer_host_exec 1 + +// clEnqueueSVMMemFillRectPOCL, clEnqueueSVMMemcpyRectPOCL +#define cl_pocl_svm_rect 1 + +/****************************************************/ + +/* cl_device_command_buffer_capabilities_khr - bitfield */ +#define CL_COMMAND_BUFFER_CAPABILITY_PROFILING_POCL (1 << 8) + +/* cl_command_buffer_flags_khr */ +#define CL_COMMAND_BUFFER_PROFILING_POCL (1 << 8) + +/* cl_command_buffer_info_khr */ +#define CL_COMMAND_BUFFER_INFO_PROFILING_POCL 0x1299 + +/* cl_command_type */ +/* To be used by clGetEventInfo: */ +/* TODO use values from an assigned range */ +#define CL_COMMAND_SVM_MEMCPY_RECT_POCL 0x1210 +#define CL_COMMAND_SVM_MEMFILL_RECT_POCL 0x1211 + + +typedef cl_int (CL_API_CALL * +clCommandSVMMemcpyPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *dst_ptr, + const void *src_ptr, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandSVMMemcpyRectPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *dst_ptr, + const void *src_ptr, + const size_t *dst_origin, + const size_t *src_origin, + const size_t *region, + size_t dst_row_pitch, + size_t dst_slice_pitch, + size_t src_row_pitch, + size_t src_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandSVMMemfillPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *svm_ptr, + size_t size, + const void *pattern, + size_t pattern_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + +typedef cl_int (CL_API_CALL * +clCommandSVMMemfillRectPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *svm_ptr, + const size_t *origin, + const size_t *region, + size_t row_pitch, + size_t slice_pitch, + const void *pattern, + size_t pattern_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + + +typedef void (*CmdBufferCallbackFn_t)(void* userData); + +typedef cl_int (CL_API_CALL * +clCommandHostFuncPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + CmdBufferCallbackFn_t callback_fn, + void* user_data, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandWaitForEventPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_event Event, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandSignalEventPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_event *Event, // output + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + + +typedef cl_int (CL_API_CALL * +clCommandReadBufferPOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + size_t offset, + size_t size, + void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandReadBufferRectPOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandReadImagePOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const size_t * origin, /* [3] */ + const size_t * region, /* [3] */ + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandWriteBufferPOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + size_t offset, + size_t size, + const void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandWriteBufferRectPOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandWriteImagePOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const size_t * origin, /*[3]*/ + const size_t * region, /*[3]*/ + size_t row_pitch, + size_t slice_pitch, + const void * ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clEnqueueSVMMemcpyRectPOCL_fn) (cl_command_queue command_queue, + cl_bool blocking, + void *dst_ptr, + const void *src_ptr, + const size_t *dst_origin, + const size_t *src_origin, + const size_t *region, + size_t dst_row_pitch, + size_t dst_slice_pitch, + size_t src_row_pitch, + size_t src_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event); + +typedef cl_int (CL_API_CALL * +clEnqueueSVMMemFillRectPOCL_fn) (cl_command_queue command_queue, + void * svm_ptr, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSVMMemcpyPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *dst_ptr, + const void *src_ptr, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSVMMemcpyRectPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *dst_ptr, + const void *src_ptr, + const size_t *dst_origin, + const size_t *src_origin, + const size_t *region, + size_t dst_row_pitch, + size_t dst_slice_pitch, + size_t src_row_pitch, + size_t src_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSVMMemfillPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *svm_ptr, + size_t size, + const void *pattern, + size_t pattern_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSVMMemfillRectPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *svm_ptr, + const size_t *origin, + const size_t *region, + size_t row_pitch, + size_t slice_pitch, + const void *pattern, + size_t pattern_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + + + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandHostFuncPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + CmdBufferCallbackFn_t callback_fn, + void* user_data, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandWaitForEventPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_event Event, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSignalEventPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_event *Event, // output + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandReadBufferPOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + size_t offset, + size_t size, + void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandReadBufferRectPOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandReadImagePOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const size_t * origin, /* [3] */ + const size_t * region, /* [3] */ + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandWriteBufferPOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + size_t offset, + size_t size, + const void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandWriteBufferRectPOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandWriteImagePOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const size_t * origin, /*[3]*/ + const size_t * region, /*[3]*/ + size_t row_pitch, + size_t slice_pitch, + const void * ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpyRectPOCL (cl_command_queue command_queue, + cl_bool blocking, + void *dst_ptr, + const void *src_ptr, + const size_t *dst_origin, + const size_t *src_origin, + const size_t *region, + size_t dst_row_pitch, + size_t dst_slice_pitch, + size_t src_row_pitch, + size_t src_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFillRectPOCL (cl_command_queue command_queue, + void * svm_ptr, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __CL_EXT_POCL_H */ diff --git a/include/CL/opencl.h b/include/CL/opencl.h index 1c4e10c88..380b55f8f 100644 --- a/include/CL/opencl.h +++ b/include/CL/opencl.h @@ -25,6 +25,7 @@ extern "C" { #include #include #include +#include #ifdef __cplusplus } From 525f0a4611358f1b74da44620c7ac107710af333 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Tue, 9 May 2023 14:27:40 +0300 Subject: [PATCH 04/35] src/CHIPBindings.cc: add missing error-checking for Graph API --- src/CHIPBindings.cc | 95 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 2 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 0ca44a0a2..ae9a9cb0d 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -266,6 +266,8 @@ static void handleAbortRequest(CHIPQueue &Q, CHIPModule &M) { hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { CHIP_TRY + if (!pGraph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraph *Graph = new CHIPGraph(); *pGraph = Graph; @@ -275,6 +277,8 @@ hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { hipError_t hipGraphDestroy(hipGraph_t graph) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); delete graph; RETURN(hipSuccess); @@ -285,6 +289,10 @@ hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t *from, const hipGraphNode_t *to, size_t numDependencies) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); + if (!from || !to) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); if (!FoundNode) @@ -300,6 +308,10 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t *to, size_t numDependencies) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); + if (!from || !to) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); if (!FoundNode) @@ -313,7 +325,11 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, hipGraphNode_t *to, size_t *numEdges) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!from || !to || !numEdges) + RETURN(hipErrorInvalidHandle); auto Edges = GRAPH(graph)->getEdges(); if (!to && !from) { *numEdges = Edges.size(); @@ -334,6 +350,10 @@ hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, size_t *numNodes) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); + if (!nodes || !numNodes) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Nodes = GRAPH(graph)->getNodes(); *nodes = *(Nodes.data()); @@ -345,6 +365,10 @@ hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t *pRootNodes, size_t *pNumRootNodes) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); + if (!pRootNodes || !pNumRootNodes) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Nodes = GRAPH(graph)->getRootNodes(); *pRootNodes = *(Nodes.data()); @@ -357,6 +381,10 @@ hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, hipGraphNode_t *pDependencies, size_t *pNumDependencies) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); + if (!pDependencies || !pNumDependencies) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Deps = NODE(node)->getDependencies(); *pNumDependencies = Deps.size(); @@ -373,6 +401,8 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipGraphNode_t *pDependentNodes, size_t *pNumDependentNodes) { CHIP_TRY + if (!node || !pDependentNodes || !pNumDependentNodes) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Deps = NODE(node)->getDependants(); *pNumDependentNodes = Deps.size(); @@ -387,6 +417,8 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType *pType) { CHIP_TRY + if (!pType || !node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); *pType = NODE(node)->getType(); RETURN(hipSuccess); @@ -396,6 +428,8 @@ hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType *pType) { hipError_t hipGraphDestroyNode(hipGraphNode_t node) { CHIP_TRY CHIPInitialize(); + if (!node) + RETURN(hipErrorInvalidHandle); /** * have to resort to these shenanigans to call the proper derived destructor */ @@ -441,6 +475,8 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) { hipError_t hipGraphClone(hipGraph_t *pGraphClone, hipGraph_t originalGraph) { CHIP_TRY + if (!pGraphClone || !originalGraph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraph *CloneGraph = new CHIPGraph(*GRAPH(originalGraph)); *pGraphClone = CloneGraph; @@ -452,6 +488,8 @@ hipError_t hipGraphNodeFindInClone(hipGraphNode_t *pNode, hipGraphNode_t originalNode, hipGraph_t clonedGraph) { CHIP_TRY + if (!pNode || !originalNode || !clonedGraph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Node = GRAPH(clonedGraph)->getClonedNodeFromOriginal(NODE(originalNode)); *pNode = Node; @@ -463,6 +501,8 @@ hipError_t hipGraphInstantiate(hipGraphExec_t *pGraphExec, hipGraph_t graph, hipGraphNode_t *pErrorNode, char *pLogBuffer, size_t bufferSize) { CHIP_TRY + if (!pGraphExec || !graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphExec *GraphExec = new CHIPGraphExec(GRAPH(graph)); *pGraphExec = GraphExec; @@ -475,6 +515,8 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t *pGraphExec, hipGraph_t graph, unsigned long long flags) { CHIP_TRY + if (!pGraphExec || !graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); // flags not yet defined in HIP API. UNIMPLEMENTED(hipErrorNotSupported); @@ -483,6 +525,8 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t *pGraphExec, hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { CHIP_TRY + if (!graphExec || !stream) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto ChipQueue = static_cast(stream); ChipQueue = Backend->findQueue(ChipQueue); @@ -493,6 +537,8 @@ hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec) { CHIP_TRY + if (!graphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); delete graphExec; RETURN(hipSuccess); @@ -503,6 +549,8 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, hipGraphNode_t *hErrorNode_out, hipGraphExecUpdateResult *updateResult_out) { CHIP_TRY + if (!hGraphExec || !hGraph || !hErrorNode_out || !updateResult_out) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); // TODO Graphs - hipGraphExecUpdate /** @@ -616,6 +664,8 @@ hipError_t hipGraphAddKernelNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipKernelNodeParams *pNodeParams) { CHIP_TRY + if (!pGraphNode || !graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeKernel *Node = new CHIPGraphNodeKernel{pNodeParams}; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -629,6 +679,8 @@ hipError_t hipGraphAddKernelNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); *pNodeParams = ((CHIPGraphNodeKernel *)node)->getParams(); RETURN(hipSuccess); @@ -638,6 +690,8 @@ hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); ((CHIPGraphNodeKernel *)node)->setParams(*pNodeParams); RETURN(hipSuccess); @@ -648,6 +702,8 @@ hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipKernelNodeParams *pNodeParams) { CHIP_TRY + if (!hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); // Graph obtained from hipGraphExec_t is a clone of the original CHIPGraph *Graph = EXEC(hGraphExec)->getOriginalGraphPtr(); @@ -672,7 +728,7 @@ hipError_t hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, // graphs test seems wrong - normally we expect hipErrorInvalidHandle // NULLCHECK(graph, pGraphNode, pCopyParams); if (!graph || !pGraphNode || !pCopyParams) - RETURN(hipErrorInvalidValue); + RETURN(hipErrorInvalidHandle); if (pDependencies == nullptr & numDependencies > 0) CHIPERR_LOG_AND_THROW( "numDependencies is not 0 while pDependencies is null", @@ -869,6 +925,8 @@ hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, size_t offset, hipMemcpyKind kind) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); static_cast(node)->setParams( const_cast(src), symbol, count, offset, kind); @@ -880,6 +938,8 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol( hipGraphExec_t hGraphExec, hipGraphNode_t node, const void *symbol, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { CHIP_TRY + if (!node || !hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); @@ -903,6 +963,8 @@ hipError_t hipGraphAddMemsetNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipMemsetParams *pMemsetParams) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeMemset *Node = new CHIPGraphNodeMemset(pMemsetParams); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -916,6 +978,8 @@ hipError_t hipGraphAddMemsetNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); hipMemsetParams Params = static_cast(node)->getParams(); @@ -927,6 +991,8 @@ hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); static_cast(node)->setParams(pNodeParams); RETURN(hipSuccess); @@ -937,6 +1003,8 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipMemsetParams *pNodeParams) { CHIP_TRY + if (!node || !hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); @@ -959,6 +1027,8 @@ hipError_t hipGraphAddHostNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipHostNodeParams *pNodeParams) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeHost *Node = new CHIPGraphNodeHost(pNodeParams); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -972,6 +1042,8 @@ hipError_t hipGraphAddHostNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); hipHostNodeParams Params = static_cast(node)->getParams(); @@ -983,6 +1055,8 @@ hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); static_cast(node)->setParams(pNodeParams); RETURN(hipSuccess); @@ -993,6 +1067,8 @@ hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipHostNodeParams *pNodeParams) { CHIP_TRY + if (!node || !hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); @@ -1016,6 +1092,8 @@ hipError_t hipGraphAddChildGraphNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipGraph_t childGraph) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeGraph *Node = new CHIPGraphNodeGraph(GRAPH(childGraph)); *pGraphNode = Node; @@ -1029,6 +1107,8 @@ hipError_t hipGraphAddChildGraphNode(hipGraphNode_t *pGraphNode, hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t *pGraph) { CHIP_TRY + if (!node || !pGraph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); *pGraph = static_cast(node)->getGraph(); RETURN(hipSuccess); @@ -1039,6 +1119,8 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipGraph_t childGraph) { CHIP_TRY + if (!node || !hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); static_cast(node)->setGraph(GRAPH(childGraph)); RETURN(hipSuccess); @@ -1049,6 +1131,8 @@ hipError_t hipGraphAddEmptyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t *pDependencies, size_t numDependencies) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeEmpty *Node = new CHIPGraphNodeEmpty(); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -1064,6 +1148,8 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipEvent_t event) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeEventRecord *Node = new CHIPGraphNodeEventRecord(static_cast(event)); @@ -1077,6 +1163,8 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t *pGraphNode, hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t *event_out) { CHIP_TRY + if (!node || !event_out) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto CastNode = static_cast(node); if (!CastNode) @@ -1090,6 +1178,8 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { CHIP_TRY + if (!node || !event) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto CastNode = static_cast(node); if (!CastNode) @@ -2663,7 +2753,8 @@ hipError_t hipArrayCreate(hipArray **Array, hipError_t hipFreeArray(hipArray *Array) { CHIP_TRY CHIPInitialize(); - NULLCHECK(Array, Array->data); + NULLCHECK(Array); + NULLCHECK(Array->data); hipError_t Err = hipFree(Array->data); delete Array; From c4f654ecd894f983576228b71c35568eb28f3998 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Tue, 9 May 2023 16:27:14 +0300 Subject: [PATCH 05/35] implement CHIPGraphNative This class is used to implement Graphs that execute "natively" in the backend, using OpenCL command-buffers or LevelZero command-lists and only synchronizing with the host when required. Fallback to original Graph is provided in CHIPGraphExec::launch() --- src/CHIPBackend.hh | 10 + src/CHIPGraph.cc | 107 ++++-- src/CHIPGraph.hh | 91 ++++- src/backend/OpenCL/CHIPBackendOpenCL.cc | 465 +++++++++++++++++++++++- src/backend/OpenCL/CHIPBackendOpenCL.hh | 97 ++++- 5 files changed, 707 insertions(+), 63 deletions(-) diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 1c6381235..022aae5db 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -1145,6 +1145,10 @@ public: CHIPExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, hipStream_t ChipQueue); + void setupDims(dim3 GridDim, dim3 BlockDim) { + GridDim_ = GridDim; + BlockDim_ = BlockDim; + }; /** * @brief Set the Kernel object * @@ -2060,6 +2064,12 @@ public: CHIPDevice *PerThreadQueueForDevice = nullptr; + virtual CHIPEvent *enqueueNativeGraph(CHIPGraphNative *NativeGraph) { + return nullptr; + } + virtual CHIPGraphNative *createNativeGraph() { return nullptr; } + virtual void destroyNativeGraph(CHIPGraphNative *) { return; } + // I want others to be able to lock this queue? std::mutex QueueMtx; diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index e036dbcb8..ea3c4bcd1 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -82,12 +82,6 @@ CHIPGraph::CHIPGraph(const CHIPGraph &OriginalGraph) { } } -CHIPGraphNodeKernel::CHIPGraphNodeKernel(const CHIPGraphNodeKernel &Other) - : CHIPGraphNode(Other) { - Params_ = Other.Params_; - ExecItem_ = Other.ExecItem_->clone(); -} - CHIPGraphNode *CHIPGraphNodeKernel::clone() const { auto NewNode = new CHIPGraphNodeKernel(*this); return NewNode; @@ -114,10 +108,30 @@ void CHIPGraphNodeMemcpy::execute(CHIPQueue *Queue) const { hipErrorTbd); } } + +void CHIPGraphNodeKernel::setupKernelArgs() const { + ExecItem_->copyArgs(Params_.kernelParams); + ExecItem_->setupAllArgs(); +} + +CHIPGraphNodeKernel::~CHIPGraphNodeKernel() { delete ExecItem_; } + void CHIPGraphNodeKernel::execute(CHIPQueue *Queue) const { + // need to call this b/c launchImpl only + // calls ChipOclExecItem->setupAllArgs() without calling copyArgs() first + setupKernelArgs(); Queue->launch(ExecItem_); } +CHIPGraphNodeKernel::CHIPGraphNodeKernel(const CHIPGraphNodeKernel &Other) + : CHIPGraphNode(Other) { + Params_ = Other.Params_; + Kernel_ = Other.Kernel_; + ExecItem_ = Backend->createCHIPExecItem(Params_.gridDim, Params_.blockDim, + Params_.sharedMemBytes, nullptr); + ExecItem_->setKernel(Kernel_); +} + CHIPGraphNodeKernel::CHIPGraphNodeKernel(const hipKernelNodeParams *TheParams) : CHIPGraphNode(hipGraphNodeTypeKernel) { Params_.blockDim = TheParams->blockDim; @@ -127,16 +141,13 @@ CHIPGraphNodeKernel::CHIPGraphNodeKernel(const hipKernelNodeParams *TheParams) Params_.kernelParams = TheParams->kernelParams; Params_.sharedMemBytes = TheParams->sharedMemBytes; auto Dev = Backend->getActiveDevice(); - CHIPKernel *ChipKernel = Dev->findKernel(HostPtr(Params_.func)); - if (!ChipKernel) + CHIPKernel *Kernel_ = Dev->findKernel(HostPtr(Params_.func)); + if (!Kernel_) CHIPERR_LOG_AND_THROW("Could not find requested kernel", hipErrorInvalidDeviceFunction); ExecItem_ = Backend->createCHIPExecItem(Params_.gridDim, Params_.blockDim, Params_.sharedMemBytes, nullptr); - ExecItem_->setKernel(ChipKernel); - - ExecItem_->copyArgs(TheParams->kernelParams); - ExecItem_->setupAllArgs(); + ExecItem_->setKernel(Kernel_); } CHIPGraphNodeKernel::CHIPGraphNodeKernel(const void *HostFunction, dim3 GridDim, @@ -150,21 +161,18 @@ CHIPGraphNodeKernel::CHIPGraphNodeKernel(const void *HostFunction, dim3 GridDim, Params_.gridDim = GridDim; Params_.kernelParams = Args; Params_.sharedMemBytes = SharedMem; - auto Dev = Backend->getActiveDevice(); - CHIPKernel *ChipKernel = Dev->findKernel(HostPtr(HostFunction)); - if (!ChipKernel) + CHIPKernel *Kernel_ = Dev->findKernel(HostPtr(Params_.func)); + if (!Kernel_) CHIPERR_LOG_AND_THROW("Could not find requested kernel", hipErrorInvalidDeviceFunction); - ExecItem_ = - Backend->createCHIPExecItem(GridDim, BlockDim, SharedMem, nullptr); - ExecItem_->setKernel(ChipKernel); - - ExecItem_->copyArgs(Args); - ExecItem_->setupAllArgs(); + ExecItem_ = Backend->createCHIPExecItem(Params_.gridDim, Params_.blockDim, + Params_.sharedMemBytes, nullptr); + ExecItem_->setKernel(Kernel_); } -int NodeCounter = 1; +static unsigned NodeCounter = 1; + void CHIPGraph::addNode(CHIPGraphNode *Node) { logDebug("{} CHIPGraph::addNode({})", (void *)this, (void *)Node); Node->Msg = "M" + std::to_string(NodeCounter); @@ -186,22 +194,51 @@ void CHIPGraph::removeNode(CHIPGraphNode *Node) { void CHIPGraphExec::launch(CHIPQueue *Queue) { logDebug("{} CHIPGraphExec::launch({})", (void *)this, (void *)Queue); - compile(); - auto ExecQueueCopy = ExecQueues_; - while (ExecQueueCopy.size()) { - auto Nodes = ExecQueueCopy.front(); - std::string NodesInThisLevel = ""; - for (auto Node : Nodes) { - NodesInThisLevel += Node->Msg + " "; - } - logDebug("Executing nodes: {}", NodesInThisLevel); - for (auto Node : Nodes) { - logDebug("Executing {}", Node->Msg); - Node->execute(Queue); + bool UsedNativeGraph = false; + Queue->getContext(); + if (NativeGraph && NativeGraph->isFinalized()) { + // launch existing native graph + UsedNativeGraph = Queue->enqueueNativeGraph(NativeGraph.get()); + if (UsedNativeGraph) Queue->finish(); + } else if (!NativeGraph) { + // construct native graph + bool FailedToAddNode = false; + NativeGraph.reset(Queue->createNativeGraph()); + for (auto &Node : OriginalGraph_->getNodes()) { + if (!NativeGraph->addNode(Node)) { + FailedToAddNode = true; + break; + } } + if (!FailedToAddNode) { + UsedNativeGraph = NativeGraph->finalize() && + Queue->enqueueNativeGraph(NativeGraph.get()); + if (UsedNativeGraph) + Queue->finish(); + } + } - ExecQueueCopy.pop(); + if (!UsedNativeGraph) { + // NativeGraph constructed but failed to finalize. + // Use the original code path + compile(); + auto ExecQueueCopy = ExecQueues_; + while (ExecQueueCopy.size()) { + auto Nodes = ExecQueueCopy.front(); + std::string NodesInThisLevel = ""; + for (auto Node : Nodes) { + NodesInThisLevel += Node->Msg + " "; + } + logDebug("Executing nodes: {}", NodesInThisLevel); + for (auto Node : Nodes) { + logDebug("Executing {}", Node->Msg); + Node->execute(Queue); + Queue->finish(); + } + + ExecQueueCopy.pop(); + } } } diff --git a/src/CHIPGraph.hh b/src/CHIPGraph.hh index 81f237325..ea1b1cbee 100644 --- a/src/CHIPGraph.hh +++ b/src/CHIPGraph.hh @@ -196,7 +196,8 @@ public: * @param CloneMap the map containing relationships of which original node * does each cloned node correspond to. */ - void updateDependencies(std::map CloneMap) { + void + updateDependencies(std::map &CloneMap) { std::vector NewDeps; for (auto Dep : Dependencies_) { auto ClonedDep = CloneMap[Dep]; @@ -255,6 +256,7 @@ class CHIPGraphNodeKernel : public CHIPGraphNode { private: hipKernelNodeParams Params_; CHIPExecItem *ExecItem_; + CHIPKernel *Kernel_; public: CHIPGraphNodeKernel(const CHIPGraphNodeKernel &Other); @@ -264,13 +266,27 @@ public: CHIPGraphNodeKernel(const void *HostFunction, dim3 GridDim, dim3 BlockDim, void **Args, size_t SharedMem); - virtual ~CHIPGraphNodeKernel() override {} - + virtual ~CHIPGraphNodeKernel() override; virtual void execute(CHIPQueue *Queue) const override; hipKernelNodeParams getParams() const { return Params_; } - void setParams(const hipKernelNodeParams Params) { Params_ = Params; } + /// the Kernel arguments have to be setup either just before launch (when + /// using the execute() path), or if using the CHIPGraphNative then + /// just before calling their graph construction APIs. + /// + /// This is because Kernels in both LevelZero and OpenCL are stateful, + /// and users can add multiple nodes with the same kernel into a Graph. + /// Setting up arguments in CHIPGraphNodeKernel ctor would then + /// lead to all nodes using the same (those set up last) arguments. + void setupKernelArgs() const; + CHIPKernel *getKernel() const { return Kernel_; } + + void setParams(const hipKernelNodeParams Params) { + // dont allow changing kernel, needs refactoring + CHIPASSERT(Params.func == Params_.func); + Params_ = Params; + } /** * @brief Createa a copy of this node * Must copy over all the arguments @@ -298,7 +314,8 @@ public: Src_(Other.Src_), Count_(Other.Count_), Kind_(Other.Kind_) {} CHIPGraphNodeMemcpy(hipMemcpy3DParms Params) - : CHIPGraphNode(hipGraphNodeTypeMemcpy), Params_(Params) {} + : CHIPGraphNode(hipGraphNodeTypeMemcpy), Params_(Params), Src_(nullptr), + Dst_(nullptr), Count_(0), Kind_(hipMemcpyKind::hipMemcpyDefault) {} CHIPGraphNodeMemcpy(const hipMemcpy3DParms *Params) : CHIPGraphNode(hipGraphNodeTypeMemcpy) { setParams(Params); @@ -322,6 +339,14 @@ public: Kind_ = Kind; } + void getParams(void *&Dst, const void *&Src, size_t &Count, + hipMemcpyKind &Kind) { + Dst = Dst_; + Src = Src_; + Count = Count_; + Kind = Kind_; + } + void setParams(const hipMemcpy3DParms *Params) { Params_.srcArray = Params->srcArray; // if(Params->srcArray) @@ -525,6 +550,15 @@ public: Symbol_ = const_cast(Symbol); SizeBytes_ = SizeBytes; Offset_ = Offset; + Kind_ = Kind; + } + + void getParams(void *&Dst, const void *&Symbol, size_t &SizeBytes, + size_t &Offset, hipMemcpyKind &Kind) { + Dst = Dst_; + Symbol = Symbol_; + SizeBytes = SizeBytes_; + Offset = Offset_; Kind = Kind_; } @@ -573,6 +607,15 @@ public: Symbol_ = const_cast(Symbol); SizeBytes_ = SizeBytes; Offset_ = Offset; + Kind_ = Kind; + } + + void getParams(void *&Src, const void *&Symbol, size_t &SizeBytes, + size_t &Offset, hipMemcpyKind &Kind) { + Src = Src_; + Symbol = Symbol_; + SizeBytes = SizeBytes_; + Offset = Offset_; Kind = Kind_; } }; @@ -647,11 +690,25 @@ public: } }; +class CHIPGraphNative { +protected: + bool Finalized; + +public: + CHIPGraphNative() : Finalized(false){}; + virtual ~CHIPGraphNative() {} + bool isFinalized() { return Finalized; } + virtual bool finalize() { return false; } + virtual bool addNode(CHIPGraphNode *NewNode) { return false; } +}; + class CHIPGraphExec : public hipGraphExec { protected: CHIPGraph *OriginalGraph_; CHIPGraph CompiledGraph_; + std::unique_ptr NativeGraph; + /** * @brief each element in this queue represents represents a sequence of nodes * that can be submitted to one or more queues @@ -678,6 +735,17 @@ protected: */ void pruneGraph_(); + /** + * @brief Optimize and generate ExecQueues_ + * + * This method will first call PruneGraph and then generate an executable + * queue. Executable queue is made up of sets of nodes. All members of the + * aforementioned set can be executed simultanously in no particular order. + * @see PruneGraph + * + */ + void compile(); + public: CHIPGraphExec(CHIPGraph *Graph) : OriginalGraph_(Graph), /* Copy the pointer to the original graph */ @@ -690,17 +758,6 @@ public: void launch(CHIPQueue *Queue); CHIPGraph *getOriginalGraphPtr() const { return OriginalGraph_; } - - /** - * @brief Optimize and generate ExecQueues_ - * - * This method will first call PruneGraph and then generate an executable - * queue. Executable queue is made up of sets of nodes. All members of the - * aforementioned set can be executed simultanously in no particular order. - * @see PruneGraph - * - */ - void compile(); }; -#endif // include guard \ No newline at end of file +#endif // include guard diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 84fe19727..d29c15f70 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -886,8 +886,81 @@ void CHIPContextOpenCL::freeImpl(void *Ptr) { } cl::Context *CHIPContextOpenCL::get() { return ClContext; } -CHIPContextOpenCL::CHIPContextOpenCL(cl::Context *CtxIn) { + +CHIPContextOpenCL::CHIPContextOpenCL(cl::Context *CtxIn, cl::Device Dev, + cl::Platform Plat) { logTrace("CHIPContextOpenCL Initialized via OpenCL Context pointer."); + std::string DevExts = Dev.getInfo(); + std::memset(&Exts, 0, sizeof(Exts)); + SupportsCommandBuffers = + DevExts.find("cl_khr_command_buffer") != std::string::npos; + if (SupportsCommandBuffers) { + Exts.clCreateCommandBufferKHR = + (clCreateCommandBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCreateCommandBufferKHR"); + Exts.clCommandCopyBufferKHR = + (clCommandCopyBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandCopyBufferKHR"); + Exts.clCommandCopyBufferRectKHR = (clCommandCopyBufferRectKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandCopyBufferRectKHR"); + Exts.clCommandFillBufferKHR = + (clCommandFillBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandFillBufferKHR"); + Exts.clCommandNDRangeKernelKHR = (clCommandNDRangeKernelKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandNDRangeKernelKHR"); + Exts.clCommandBarrierWithWaitListKHR = + (clCommandBarrierWithWaitListKHR_fn):: + clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandBarrierWithWaitListKHR"); + Exts.clFinalizeCommandBufferKHR = (clFinalizeCommandBufferKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clFinalizeCommandBufferKHR"); + Exts.clEnqueueCommandBufferKHR = (clEnqueueCommandBufferKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clEnqueueCommandBufferKHR"); + Exts.clReleaseCommandBufferKHR = (clReleaseCommandBufferKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clReleaseCommandBufferKHR"); + Exts.clGetCommandBufferInfoKHR = (clGetCommandBufferInfoKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clGetCommandBufferInfoKHR"); + } +#ifdef cl_pocl_command_buffer_svm + SupportsCommandBuffersSVM = + DevExts.find("cl_pocl_command_buffer_svm") != std::string::npos; + if (SupportsCommandBuffersSVM) { + Exts.clCommandSVMMemcpyPOCL = + (clCommandSVMMemcpyPOCL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandSVMMemcpyPOCL"); + Exts.clCommandSVMMemcpyRectPOCL = (clCommandSVMMemcpyRectPOCL_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandSVMMemcpyRectPOCL"); + Exts.clCommandSVMMemfillPOCL = + (clCommandSVMMemfillPOCL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandSVMMemfillPOCL"); + Exts.clCommandSVMMemfillRectPOCL = (clCommandSVMMemfillRectPOCL_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandSVMMemfillRectPOCL"); + } +#endif +#ifdef cl_pocl_command_buffer_host_exec + SupportsCommandBuffersHost = + DevExts.find("cl_pocl_command_buffer_host_exec") != std::string::npos; + if (SupportsCommandBuffersHost) { + Exts.clCommandHostFuncPOCL = + (clCommandHostFuncPOCL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandHostFuncPOCL"); + Exts.clCommandWaitForEventPOCL = (clCommandWaitForEventPOCL_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandWaitForEventPOCL"); + Exts.clCommandSignalEventPOCL = + (clCommandSignalEventPOCL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandSignalEventPOCL"); + } +#endif + ClContext = CtxIn; SvmMemory.init(*CtxIn); } @@ -1288,6 +1361,388 @@ CHIPQueueOpenCL::enqueueBarrierImpl(std::vector *EventsToWaitFor) { return Event; } +/********************************************************************************/ + +CHIPGraphNative *CHIPQueueOpenCL::createNativeGraph() { + // should not raise an error if we fail to create a graph, + // because there is a fallback solution + CHIPContextOpenCL *Ctx = (CHIPContextOpenCL *)ChipContext_; + if (!Ctx->supportsCommandBuffers()) + return nullptr; + + cl_command_queue CQ = ClQueue_->get(); + int err = CL_SUCCESS; + cl_command_buffer_khr Res = + Ctx->exts()->clCreateCommandBufferKHR(1, &CQ, 0, &err); + if (Res == nullptr || err != CL_SUCCESS) + return nullptr; + + return new CHIPGraphNativeOpenCL(Res, CQ, Ctx->exts()); +} + +CHIPEvent *CHIPQueueOpenCL::enqueueNativeGraph(CHIPGraphNative *NativeGraph) { + CHIPEventOpenCL *Event = + (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); + + CHIPContextOpenCL *Ctx = (CHIPContextOpenCL *)ChipContext_; + CHIPGraphNativeOpenCL *G = (CHIPGraphNativeOpenCL *)NativeGraph; + if (!Ctx->supportsCommandBuffers()) + return nullptr; + if (NativeGraph == nullptr) + return nullptr; + cl_command_queue CQ = ClQueue_->get(); + int Status = Ctx->exts()->clEnqueueCommandBufferKHR( + 1, &CQ, G->get(), 0, nullptr, Event->getNativePtr()); + CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + return Event; +} + +void CHIPQueueOpenCL::destroyNativeGraph(CHIPGraphNative *NativeGraph) { + if (NativeGraph == nullptr) + return; + CHIPGraphNativeOpenCL *G = (CHIPGraphNativeOpenCL *)NativeGraph; + delete G; +} + +bool CHIPGraphNativeOpenCL::addNodeToNativeGraph(CHIPGraphNative *NativeGraph, + CHIPGraphNode *NewNode) { + cl_sync_point_khr NewSyncPoint = -1; + + // map the dependent CHIPGraphNodes to OpenCL syncpoints + const std::vector &Dependencies = NewNode->getDependencies(); + std::vector SyncPointDeps; + for (auto Node : Dependencies) { + auto Iter = SyncPointMap.find(Node); + if (Iter == SyncPointMap.end()) { + logError("Can't find SyncPoint for Node"); + return false; + } + SyncPointDeps.push_back(Iter->second); + } + + hipGraphNodeType NodeType = NewNode->getType(); + bool Res; + switch (NodeType) { + case hipGraphNodeTypeKernel: + Res = addKernelNode((CHIPGraphNodeKernel *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; + case hipGraphNodeTypeEmpty: + assert(0 && "Empty node should be removed earlier"); + +#ifdef cl_pocl_command_buffer_svm + case hipGraphNodeTypeMemcpy: + Res = addMemcpyNode((CHIPGraphNodeMemcpy *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; + case hipGraphNodeTypeMemset: + Res = addMemsetNode((CHIPGraphNodeMemset *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; + case hipGraphNodeTypeMemcpyFromSymbol: + Res = addMemcpyNode((CHIPGraphNodeMemcpyFromSymbol *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; + case hipGraphNodeTypeMemcpyToSymbol: + Res = addMemcpyNode((CHIPGraphNodeMemcpyToSymbol *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; +#endif + +#ifdef cl_pocl_command_buffer_host_exec + case hipGraphNodeTypeWaitEvent: + Res = addEventWaitNode((CHIPGraphNodeWaitEvent *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; + case hipGraphNodeTypeEventRecord: + Res = addEventRecordNode((CHIPGraphNodeEventRecord *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; + case hipGraphNodeTypeHost: + Res = + addHostNode((CHIPGraphNodeHost *)NewNode, SyncPointDeps, &NewSyncPoint); + break; +#endif + + default: + Res = false; + } + if (!Res) + return false; + + SyncPointMap.insert(std::make_pair(NewNode, NewSyncPoint)); + return true; +} + +bool CHIPGraphNativeOpenCL::finalize() { + int Status = Exts->clFinalizeCommandBufferKHR(Handle); + if (Status == CL_SUCCESS) { + Finalized = true; + return true; + } + return false; +} + +CHIPGraphNativeOpenCL::~CHIPGraphNativeOpenCL() { + if (Handle == nullptr) + return; + int Err = Exts->clReleaseCommandBufferKHR(Handle); + assert(Err == CL_SUCCESS); +} + +// TODO finish +bool CHIPGraphNativeOpenCL::addKernelNode( + CHIPGraphNodeKernel *Node, std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + + int Status; + // possibly use: CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR + cl_ndrange_kernel_command_properties_khr Properties[] = {0, 0}; + + // TODO: we should add what CHIPQueue::launch does with Registered (Global) + // Vars + // TODO also look at SpillBuffer handling in: + // CHIPEvent *CHIPQueueOpenCL::launchImpl(CHIPExecItem *ExecItem) { + + // setup the kernel arguments before calling clCommandNDRange + Node->setupKernelArgs(); + + CHIPKernel *K = Node->getKernel(); + CHIPKernelOpenCL *CLK = static_cast(K); + + hipKernelNodeParams Params = Node->getParams(); + size_t LWSize[3] = {Params.blockDim.x, Params.blockDim.y, Params.blockDim.z}; + size_t GWSize[3] = {Params.blockDim.x * Params.gridDim.x, + Params.blockDim.y * Params.gridDim.y, + Params.blockDim.z * Params.gridDim.z}; + uint WorkDim = 3; + + assert(Exts->clCommandNDRangeKernelKHR); + Status = Exts->clCommandNDRangeKernelKHR( + Handle, CmdQ, Properties, CLK->get()->get(), + WorkDim, // cl_uint work_dim + nullptr, // const size_t* global_work_offset, + GWSize, // const size_t* global_work_size, + LWSize, // const size_t* local_work_size, + SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); + return Status == CL_SUCCESS; +} + +#ifdef cl_pocl_command_buffer_svm + +// TODO finish Arrays +bool CHIPGraphNativeOpenCL::addMemcpyNode( + CHIPGraphNodeMemcpy *Node, std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + int Status; + void *Dst; + const void *Src; + size_t Size; + hipMemcpyKind Kind; + hipMemcpy3DParms Params; + + // Although ROCm API ref says that Dst and Src should not overlap, + // HIP seems to handle Dst == Src as a special (no-operation) case. + // This is seen in the test unit/memory/hipMemcpyAllApiNegative. + // Intel GPU OpenCL driver seems to do also so for clEnqueueSVMMemcpy, which + // makes/ it pass, but Intel CPU OpenCL returns CL_​MEM_​COPY_​OVERLAP + // like it should. To unify the behavior, let's convert the special case to + // a maker here, so we can return an event. + + Node->getParams(Dst, Src, Size, Kind); + Params = Node->getParams(); + if (Dst == nullptr || Src == nullptr) { + if (!Exts->clCommandSVMMemcpyRectPOCL) + return false; + // 3D copy + // TODO handle arrays + assert(Params.dstArray == nullptr && "Arrays not supported yet"); + assert(Params.srcArray == nullptr && "Arrays not supported yet"); + + /* + * The struct passed to cudaMemcpy3D() must specify one of srcArray or + * srcPtr and one of dstArray or dstPtr. Passing more than one non-zero + * source or destination will cause cudaMemcpy3D() to return an error. The + * srcPos and dstPos fields are optional offsets into the source and + * destination objects and are defined in units of each object's elements. + * The element for a host or device pointer is assumed to be unsigned char. + * The extent field defines the dimensions of the transferred area in + * elements. If a CUDA array is participating in the copy, the extent is + * defined in terms of that array's elements. If no CUDA array is + * participating in the copy then the extents are defined in elements of + * unsigned char. + */ + + // TODO: HANDLE FOR ARRAYS: + // The srcPos and dstPos fields are optional offsets into the source & + // destination objects and are defined in units of each object's elements + // ... The element for a host or device pointer is assumed to be unsigned + // char. + size_t src_origin[3] = {Params.srcPos.x, Params.srcPos.y, Params.srcPos.z}; + size_t dst_origin[3] = {Params.dstPos.x, Params.dstPos.y, Params.dstPos.z}; + // If no CUDA array is participating in the copy then the extents + // are defined in elements of unsigned char. + size_t region[3] = {Params.extent.width, Params.extent.height, + Params.extent.depth}; + + // TODO this might be wrong. + size_t src_row_pitch = Params.srcPtr.pitch; + size_t src_slice_pitch = src_row_pitch * Params.srcPtr.ysize; + size_t dst_row_pitch = Params.dstPtr.pitch; + size_t dst_slice_pitch = dst_row_pitch * Params.dstPtr.ysize; + + Status = Exts->clCommandSVMMemcpyRectPOCL( + Handle, CmdQ, Dst, Src, dst_origin, src_origin, region, dst_row_pitch, + dst_slice_pitch, src_row_pitch, src_slice_pitch, SyncPointDeps.size(), + SyncPointDeps.data(), SyncPoint, nullptr); + } else { + // 1D copy + if (!Exts->clCommandSVMMemcpyPOCL) + return false; + if (Dst == Src) { + Status = Exts->clCommandBarrierWithWaitListKHR( + Handle, CmdQ, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, + nullptr); + CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + } else { + Status = Exts->clCommandSVMMemcpyPOCL( + Handle, CmdQ, Dst, Src, Size, SyncPointDeps.size(), + SyncPointDeps.data(), SyncPoint, nullptr); + } + } + + return Status == CL_SUCCESS; +} + +// DONE +bool CHIPGraphNativeOpenCL::addMemcpyNode( + CHIPGraphNodeMemcpyFromSymbol *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + + if (!Exts->clCommandSVMMemcpyPOCL) + return false; + + void *Dst = nullptr; + void *Src = nullptr; + const void *Symbol; + size_t SizeBytes; + size_t Offset; + hipMemcpyKind Kind; + Node->getParams(Dst, Symbol, SizeBytes, Offset, Kind); + + hipError_t Err = hipGetSymbolAddress(&Src, Symbol); + if (Err != HIP_SUCCESS) + return false; + + int Status = Exts->clCommandSVMMemcpyPOCL( + Handle, CmdQ, Dst, (const char *)Src + Offset, SizeBytes, + SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); + + return Status == CL_SUCCESS; +} + +// DONE +bool CHIPGraphNativeOpenCL::addMemcpyNode( + CHIPGraphNodeMemcpyToSymbol *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts->clCommandSVMMemcpyPOCL) + return false; + + void *Dst = nullptr; + void *Src = nullptr; + const void *Symbol; + size_t SizeBytes; + size_t Offset; + hipMemcpyKind Kind; + Node->getParams(Src, Symbol, SizeBytes, Offset, Kind); + + hipError_t Err = hipGetSymbolAddress(&Dst, Symbol); + if (Err != HIP_SUCCESS) + return false; + + int Status = Exts->clCommandSVMMemcpyPOCL( + Handle, CmdQ, (char *)Dst + Offset, Src, SizeBytes, SyncPointDeps.size(), + SyncPointDeps.data(), SyncPoint, nullptr); + return Status == CL_SUCCESS; +} + +// DONE +bool CHIPGraphNativeOpenCL::addMemsetNode( + CHIPGraphNodeMemset *Node, std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts->clCommandSVMMemfillRectPOCL) + return false; + + hipMemsetParams Params = Node->getParams(); + + int Status; + size_t Region[3] = {Params.width, Params.height, 1}; + Status = Exts->clCommandSVMMemfillRectPOCL( + Handle, CmdQ, Params.dst, + nullptr, // origin + Region, // region + Params.pitch, // row pitch + 0, // slice pitch + (const void *)&Params.value, Params.elementSize, SyncPointDeps.size(), + SyncPointDeps.data(), SyncPoint, nullptr); + return Status == CL_SUCCESS; +} +#endif + +#ifdef cl_pocl_command_buffer_host_exec + +// DONE +bool CHIPGraphNativeOpenCL::addHostNode( + CHIPGraphNodeHost *Node, std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts->clCommandHostFuncPOCL) + return false; + + hipHostNodeParams Params = Node->getParams(); + + int Status; + Status = Exts->clCommandHostFuncPOCL( + Handle, CmdQ, Params.fn, Params.userData, SyncPointDeps.size(), + SyncPointDeps.data(), SyncPoint, nullptr); + return Status == CL_SUCCESS; +} + +// TODO output cl_event +bool CHIPGraphNativeOpenCL::addEventRecordNode( + CHIPGraphNodeEventRecord *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts->clCommandSignalEventPOCL) + return false; + + CHIPEvent *E = Node->getEvent(); + CHIPEventOpenCL *CLE = static_cast(E); + + int Status; + Status = Exts->clCommandSignalEventPOCL(Handle, CmdQ, CLE->ClEvent, SyncPoint, + nullptr); + return Status == CL_SUCCESS; +} + +// DONE +bool CHIPGraphNativeOpenCL::addEventWaitNode( + CHIPGraphNodeWaitEvent *Node, std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts->clCommandWaitForEventPOCL) + return false; + + CHIPEvent *E = Node->getEvent(); + CHIPEventOpenCL *CLE = static_cast(E); + + int Status; + Status = Exts->clCommandWaitForEventPOCL(Handle, CmdQ, CLE->ClEvent, + SyncPoint, nullptr); + return Status == CL_SUCCESS; +} +#endif + // CHIPExecItemOpenCL //************************************************************************* @@ -1521,7 +1976,8 @@ void CHIPBackendOpenCL::initializeImpl(std::string CHIPPlatformStr, // Create queues that have devices each of which has an associated context // TODO Change this to spirv_enabled_devices cl::Context *Ctx = new cl::Context(SpirvDevices); - CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx); + CHIPContextOpenCL *ChipContext = + new CHIPContextOpenCL(Ctx, Device, SelectedPlatform); Backend->addContext(ChipContext); // TODO for now only a single device is supported. @@ -1541,11 +1997,12 @@ void CHIPBackendOpenCL::initializeFromNative(const uintptr_t *NativeHandles, cl_device_id DevId = (cl_device_id)NativeHandles[1]; cl_context CtxId = (cl_context)NativeHandles[2]; + cl::Device *Dev = new cl::Device(DevId); + cl::Platform Plat(Dev->getInfo()); cl::Context *Ctx = new cl::Context(CtxId); - CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx); + CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx, *Dev, Plat); addContext(ChipContext); - cl::Device *Dev = new cl::Device(DevId); CHIPDeviceOpenCL *ChipDev = CHIPDeviceOpenCL::create(Dev, ChipContext, 0); logTrace("CHIPDeviceOpenCL {}", ChipDev->ClDevice->getInfo()); diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index a83bc953e..80949182a 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -41,7 +41,7 @@ #pragma OPENCL EXTENSION cl_khr_priority_hints : enable -#include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-braces" @@ -141,9 +141,6 @@ public: bool hasPointer(const void *Ptr); bool pointerSize(void *Ptr, size_t *Size); bool pointerInfo(void *Ptr, void **Base, size_t *Size); - int memCopy(void *Dst, const void *Src, size_t Size, cl::CommandQueue &Queue); - int memFill(void *Dst, size_t Size, const void *Pattern, size_t PatternSize, - cl::CommandQueue &Queue); void clear(); size_t getNumAllocations() const { return SvmAllocations_.size(); } @@ -154,12 +151,44 @@ public: } }; +typedef struct { + clCreateCommandBufferKHR_fn clCreateCommandBufferKHR; + clCommandCopyBufferKHR_fn clCommandCopyBufferKHR; + clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR; + clCommandFillBufferKHR_fn clCommandFillBufferKHR; + clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR; + clCommandBarrierWithWaitListKHR_fn clCommandBarrierWithWaitListKHR; + clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR; + clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR; + clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR; + clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR; + +#ifdef cl_pocl_command_buffer_svm + clCommandSVMMemcpyPOCL_fn clCommandSVMMemcpyPOCL; + clCommandSVMMemcpyRectPOCL_fn clCommandSVMMemcpyRectPOCL; + clCommandSVMMemfillPOCL_fn clCommandSVMMemfillPOCL; + clCommandSVMMemfillRectPOCL_fn clCommandSVMMemfillRectPOCL; +#endif + +#ifdef cl_pocl_command_buffer_host_exec + clCommandHostFuncPOCL_fn clCommandHostFuncPOCL; + clCommandWaitForEventPOCL_fn clCommandWaitForEventPOCL; + clCommandSignalEventPOCL_fn clCommandSignalEventPOCL; +#endif + +} CHIPContextClExts; + class CHIPContextOpenCL : public CHIPContext { + cl::Context *ClContext; + bool SupportsCommandBuffers; + bool SupportsCommandBuffersSVM; + bool SupportsCommandBuffersHost; + CHIPContextClExts Exts; + SVMemoryRegion SvmMemory; + public: bool allDevicesSupportFineGrainSVM(); - SVMemoryRegion SvmMemory; - cl::Context *ClContext; - CHIPContextOpenCL(cl::Context *ClContext); + CHIPContextOpenCL(cl::Context *ClContext, cl::Device Dev, cl::Platform Plat); virtual ~CHIPContextOpenCL() {} void *allocateImpl(size_t Size, size_t Alignment, hipMemoryType MemType, CHIPHostAllocFlags Flags = CHIPHostAllocFlags()) override; @@ -167,6 +196,10 @@ public: bool isAllocatedPtrMappedToVM(void *Ptr) override { return false; } // TODO virtual void freeImpl(void *Ptr) override; cl::Context *get(); + bool supportsCommandBuffers() { return SupportsCommandBuffers; } + bool supportsCommandBuffersSVM() { return SupportsCommandBuffersSVM; } + bool supportsCommandBuffersHost() { return SupportsCommandBuffersHost; } + const CHIPContextClExts *exts() { return &Exts; } }; class CHIPDeviceOpenCL : public CHIPDevice { @@ -265,6 +298,10 @@ public: enqueueBarrierImpl(std::vector *EventsToWaitFor) override; virtual CHIPEvent *enqueueMarkerImpl() override; virtual CHIPEvent *memPrefetchImpl(const void *Ptr, size_t Count) override; + + virtual CHIPEvent *enqueueNativeGraph(CHIPGraphNative *NativeGraph) override; + virtual CHIPGraphNative *createNativeGraph() override; + virtual void destroyNativeGraph(CHIPGraphNative *NativeGraph) override; }; class CHIPKernelOpenCL : public CHIPKernel { @@ -386,4 +423,50 @@ public: cl_sampler getSampler() const { return Sampler; } }; +class CHIPGraphNativeOpenCL : public CHIPGraphNative { + cl_command_buffer_khr Handle; + cl_command_queue CmdQ; + std::map SyncPointMap; + const CHIPContextClExts *Exts; + + bool addKernelNode(CHIPGraphNodeKernel *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); +#ifdef cl_pocl_command_buffer_svm + bool addMemcpyNode(CHIPGraphNodeMemcpy *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addMemcpyNode(CHIPGraphNodeMemcpyFromSymbol *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addMemcpyNode(CHIPGraphNodeMemcpyToSymbol *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addMemsetNode(CHIPGraphNodeMemset *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); +#endif + +#ifdef cl_pocl_command_buffer_host_exec + bool addHostNode(CHIPGraphNodeHost *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addEventRecordNode(CHIPGraphNodeEventRecord *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addEventWaitNode(CHIPGraphNodeWaitEvent *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); +#endif + +public: + CHIPGraphNativeOpenCL(cl_command_buffer_khr H, cl_command_queue CQ, + const CHIPContextClExts *E) + : Handle(H), CmdQ(CQ), Exts(E) {} + virtual ~CHIPGraphNativeOpenCL(); + cl_command_buffer_khr get() const { return Handle; } + virtual bool finalize() override; + virtual bool addNode(CHIPGraphNode *NewNode) override; +}; + #endif From c36b530fe309d59e540c561ee55e7713d6b33aa2 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Tue, 9 May 2023 16:29:49 +0300 Subject: [PATCH 06/35] cleanup/refactor OpenCL backend code * turn CHIPEvent.Refc from pointer to integer, move it into CHIPEventLevel0 * disable increaseRefCount/decreaseRefCount for OpenCL, the OpenCL runtime already does the refcounting * remove naked pointers to OpenCL objects, instead use the smart pointers from opencl.hpp header --- include/CL/opencl.h | 1 - src/CHIPBackend.cc | 40 +-- src/CHIPBackend.hh | 16 +- src/CHIPGraph.hh | 1 + src/backend/Level0/CHIPBackendLevel0.cc | 37 +- src/backend/Level0/CHIPBackendLevel0.hh | 8 +- src/backend/OpenCL/CHIPBackendOpenCL.cc | 440 +++++++++++------------- src/backend/OpenCL/CHIPBackendOpenCL.hh | 70 ++-- 8 files changed, 278 insertions(+), 335 deletions(-) diff --git a/include/CL/opencl.h b/include/CL/opencl.h index 380b55f8f..c636e0d4e 100644 --- a/include/CL/opencl.h +++ b/include/CL/opencl.h @@ -23,7 +23,6 @@ extern "C" { #include #include -#include #include #include diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index ba691b64f..d175eca4d 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -206,10 +206,8 @@ CHIPAllocationTracker::getAllocInfoCheckPtrRanges(void *DevPtr) { // ************************************************************************ CHIPEvent::CHIPEvent(CHIPContext *Ctx, CHIPEventFlags Flags) - : EventStatus_(EVENT_STATUS_INIT), Flags_(Flags), Refc_(new size_t()), - ChipContext_(Ctx), Msg("") { - *Refc_ = 1; -} + : EventStatus_(EVENT_STATUS_INIT), Flags_(Flags), ChipContext_(Ctx), + Msg("") {} void CHIPEvent::releaseDependencies() { assert(!Deleted_ && "Event use after delete!"); @@ -220,36 +218,6 @@ void CHIPEvent::releaseDependencies() { DependsOnList.clear(); } -void CHIPEvent::decreaseRefCount(std::string Reason) { - LOCK(EventMtx); // CHIPEvent::Refc_ - assert(!Deleted_ && "Event use after delete!"); - // logDebug("CHIPEvent::decreaseRefCount() {} {} refc {}->{} REASON: {}", - // (void *)this, Msg.c_str(), *Refc_, *Refc_ - 1, Reason); - if (*Refc_ > 0) { - (*Refc_)--; - } else { - assert(false && "CHIPEvent::decreaseRefCount() called when refc == 0"); - logError("CHIPEvent::decreaseRefCount() called when refc == 0"); - } - // Destructor to be called by event monitor once backend is done using it -} -void CHIPEvent::increaseRefCount(std::string Reason) { - LOCK(EventMtx); // CHIPEvent::Refc_ - assert(!Deleted_ && "Event use after delete!"); - // logDebug("CHIPEvent::increaseRefCount() {} {} refc {}->{} REASON: {}", - // (void *)this, Msg.c_str(), *Refc_, *Refc_ + 1, Reason); - - // Base constructor and CHIPEventLevel0::reset() sets the refc_ to one. - assert(*Refc_ > 0 && "Increasing refcount from zero!"); - (*Refc_)++; -} - -size_t CHIPEvent::getCHIPRefc() { - LOCK(this->EventMtx); // CHIPEvent::Refc_ - assert(!Deleted_ && "Event use after delete!"); - return *Refc_; -} - // CHIPModuleflags_ //************************************************************************************* void CHIPModule::consumeSPIRV() { @@ -495,6 +463,8 @@ void *CHIPArgSpillBuffer::allocate(const SPVFuncInfo::Arg &Arg) { // CHIPExecItem //************************************************************************************* void CHIPExecItem::copyArgs(void **Args) { + // args need to be set up again + ArgsSetup = false; for (int i = 0; i < getNumArgs(); i++) { Args_.push_back(Args[i]); } @@ -888,8 +858,6 @@ hipSharedMemConfig CHIPDevice::getSharedMemConfig() { UNIMPLEMENTED(hipSharedMemBankSizeDefault); } -void CHIPDevice::removeContext(CHIPContext *CHIPContext) {} - bool CHIPDevice::removeQueue(CHIPQueue *ChipQueue) { /** * If commands are still executing on the specified stream, some may complete diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 022aae5db..cf7b39dc9 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -612,9 +612,6 @@ protected: bool Deleted_ = false; #endif - // reference count - size_t *Refc_; - /** * @brief Events are always created with a context * @@ -639,9 +636,9 @@ public: CHIPEventFlags getFlags() { return Flags_; } std::mutex EventMtx; std::string Msg; - size_t getCHIPRefc(); - virtual void decreaseRefCount(std::string Reason); - virtual void increaseRefCount(std::string Reason); + virtual size_t getCHIPRefc() = 0; + virtual void decreaseRefCount(std::string Reason) {} + virtual void increaseRefCount(std::string Reason) {} virtual ~CHIPEvent() = default; // Optionally provide a field for origin of this event /** @@ -1299,13 +1296,6 @@ public: CHIPQueue *createQueueAndRegister(const uintptr_t *NativeHandles, const size_t NumHandles); - void removeContext(CHIPContext *Ctx); - virtual CHIPContext *createContext() = 0; - CHIPContext *createContextAndRegister() { - Ctx_ = createContext(); - return Ctx_; - } - size_t getMaxMallocSize() { if (MaxMallocSize_ < 1) CHIPERR_LOG_AND_THROW("MaxMallocSize was not set", hipErrorTbd); diff --git a/src/CHIPGraph.hh b/src/CHIPGraph.hh index ea1b1cbee..0b3124dcb 100644 --- a/src/CHIPGraph.hh +++ b/src/CHIPGraph.hh @@ -153,6 +153,7 @@ public: * @param Count */ void addDependencies(CHIPGraphNode **Dependencies, int Count) { + for (int i = 0; i < Count; i++) { addDependency(Dependencies[i]); } diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 29e46c57f..9548d8354 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -265,7 +265,7 @@ CHIPEventLevel0::CHIPEventLevel0(CHIPContextLevel0 *ChipCtx, unsigned int ThePoolIndex, CHIPEventFlags Flags) : CHIPEvent((CHIPContext *)(ChipCtx), Flags), Event_(nullptr), - EventPoolHandle_(nullptr), Timestamp_(0) { + EventPoolHandle_(nullptr), Timestamp_(0), Refc_(1) { LOCK(TheEventPool->EventPoolMtx); // CHIPEventPool::EventPool_ via get() EventPool = TheEventPool; EventPoolIndex = ThePoolIndex; @@ -291,7 +291,7 @@ CHIPEventLevel0::CHIPEventLevel0(CHIPContextLevel0 *ChipCtx, CHIPEventLevel0::CHIPEventLevel0(CHIPContextLevel0 *ChipCtx, CHIPEventFlags Flags) : CHIPEvent((CHIPContext *)(ChipCtx), Flags), Event_(nullptr), - EventPoolHandle_(nullptr), Timestamp_(0), EventPoolIndex(0), + EventPoolHandle_(nullptr), Timestamp_(0), Refc_(1), EventPoolIndex(0), EventPool(0) { CHIPContextLevel0 *ZeCtx = (CHIPContextLevel0 *)ChipContext_; @@ -331,7 +331,7 @@ CHIPEventLevel0::CHIPEventLevel0(CHIPContextLevel0 *ChipCtx, CHIPEventLevel0::CHIPEventLevel0(CHIPContextLevel0 *ChipCtx, ze_event_handle_t NativeEvent) : CHIPEvent((CHIPContext *)(ChipCtx)), Event_(NativeEvent), - EventPoolHandle_(nullptr), Timestamp_(0), EventPoolIndex(0), + EventPoolHandle_(nullptr), Timestamp_(0), Refc_(1), EventPoolIndex(0), EventPool(nullptr) {} // Must use this for now - Level Zero hangs when events are host visible + @@ -547,6 +547,37 @@ void CHIPEventLevel0::hostSignal() { EventStatus_ = EVENT_STATUS_RECORDED; } +void CHIPEventLevel0::decreaseRefCount(std::string Reason) { + LOCK(EventMtx); // CHIPEvent::Refc_ + assert(!Deleted_ && "Event use after delete!"); + // logDebug("CHIPEvent::decreaseRefCount() {} {} refc {}->{} REASON: {}", + // (void *)this, Msg.c_str(), *Refc_, *Refc_ - 1, Reason); + if (Refc_ > 0) { + Refc_--; + } else { + assert(false && "CHIPEvent::decreaseRefCount() called when refc == 0"); + logError("CHIPEvent::decreaseRefCount() called when refc == 0"); + } + // Destructor to be called by event monitor once backend is done using it +} + +void CHIPEventLevel0::increaseRefCount(std::string Reason) { + LOCK(EventMtx); // CHIPEvent::Refc_ + assert(!Deleted_ && "Event use after delete!"); + // logDebug("CHIPEvent::increaseRefCount() {} {} refc {}->{} REASON: {}", + // (void *)this, Msg.c_str(), *Refc_, *Refc_ + 1, Reason); + + // Base constructor and CHIPEventLevel0::reset() sets the refc_ to one. + assert(Refc_ > 0 && "Increasing refcount from zero!"); + Refc_++; +} + +size_t CHIPEventLevel0::getCHIPRefc() { + LOCK(this->EventMtx); + assert(!Deleted_ && "Event use after delete!"); + return Refc_; +} + // End CHIPEventLevel0 // CHIPCallbackDataLevel0 diff --git a/src/backend/Level0/CHIPBackendLevel0.hh b/src/backend/Level0/CHIPBackendLevel0.hh index efd12cc91..7c8bb8b07 100644 --- a/src/backend/Level0/CHIPBackendLevel0.hh +++ b/src/backend/Level0/CHIPBackendLevel0.hh @@ -89,6 +89,9 @@ private: // The timestamp value uint64_t Timestamp_; + // reference count + size_t Refc_ = 1; + std::vector Actions_; public: @@ -121,6 +124,10 @@ public: ze_event_handle_t peek(); ze_event_handle_t get(std::string Msg); + virtual size_t getCHIPRefc() override; + + virtual void decreaseRefCount(std::string Reason) override; + virtual void increaseRefCount(std::string Reason) override; /// Bind an action which is promised to be executed when the event is /// finished. @@ -479,7 +486,6 @@ class CHIPDeviceLevel0 : public CHIPDevice { ze_command_queue_desc_t getQueueDesc_(int Priority); public: - virtual CHIPContextLevel0 *createContext() override {} bool copyQueueIsAvailable() { return CopyQueueAvailable_; } ze_command_list_desc_t getCommandListComputeDesc() { return CommandListComputeDesc_; diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index d29c15f70..480d83cf4 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -285,7 +285,6 @@ CHIPEventMonitorOpenCL::CHIPEventMonitorOpenCL() : CHIPEventMonitor(){}; void CHIPEventMonitorOpenCL::monitor() { logTrace("CHIPEventMonitorOpenCL::monitor()"); - CHIPEventMonitor::monitor(); } // CHIPDeviceOpenCL @@ -298,10 +297,10 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, logTrace("CHIPDeviceOpenCL::createTexture"); bool NormalizedFloat = TexDesc->readMode == hipReadModeNormalizedFloat; - auto *Q = (CHIPQueueOpenCL *)getDefaultQueue(); + cl::CommandQueue &ClCmdQ = ((CHIPQueueOpenCL *)getDefaultQueue())->get(); - cl_context CLCtx = ((CHIPContextOpenCL *)getContext())->get()->get(); - cl_sampler Sampler = createSampler(CLCtx, *ResDesc, *TexDesc); + cl::Context &ClContext = ((CHIPContextOpenCL *)getContext())->get(); + cl_sampler Sampler = createSampler(ClContext.get(), *ResDesc, *TexDesc); if (ResDesc->resType == hipResourceTypeArray) { hipArray *Array = ResDesc->res.array.array; @@ -312,14 +311,14 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, size_t Height = Array->height; size_t Depth = Array->depth; - cl_mem Image = createImage(CLCtx, Array->textureType, Array->desc, + cl_mem Image = createImage(ClContext.get(), Array->textureType, Array->desc, NormalizedFloat, Width, Height, Depth); auto Tex = std::make_unique(*ResDesc, Image, Sampler); logTrace("Created texture: {}", (void *)Tex.get()); CHIPRegionDesc SrcRegion = CHIPRegionDesc::from(*Array); - memCopyToImage(Q->get()->get(), Image, Array->data, SrcRegion); + memCopyToImage(ClCmdQ.get(), Image, Array->data, SrcRegion); return Tex.release(); } @@ -329,15 +328,15 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, auto TexelByteSize = getChannelByteSize(Res.desc); size_t Width = Res.sizeInBytes / TexelByteSize; - cl_mem Image = - createImage(CLCtx, hipTextureType1D, Res.desc, NormalizedFloat, Width); + cl_mem Image = createImage(ClContext.get(), hipTextureType1D, Res.desc, + NormalizedFloat, Width); auto Tex = std::make_unique(*ResDesc, Image, Sampler); logTrace("Created texture: {}", (void *)Tex.get()); // Copy data to image. auto SrcDesc = CHIPRegionDesc::get1DRegion(Width, TexelByteSize); - memCopyToImage(Q->get()->get(), Image, Res.devPtr, SrcDesc); + memCopyToImage(ClCmdQ.get(), Image, Res.devPtr, SrcDesc); return Tex.release(); } @@ -346,7 +345,7 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, auto &Res = ResDesc->res.pitch2D; assert(Res.pitchInBytes >= Res.width); // Checked in CHIPBindings. - cl_mem Image = createImage(CLCtx, hipTextureType2D, Res.desc, + cl_mem Image = createImage(ClContext.get(), hipTextureType2D, Res.desc, NormalizedFloat, Res.width, Res.height); auto Tex = std::make_unique(*ResDesc, Image, Sampler); @@ -354,7 +353,7 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, // Copy data to image. auto SrcDesc = CHIPRegionDesc::from(*ResDesc); - memCopyToImage(Q->get()->get(), Image, Res.devPtr, SrcDesc); + memCopyToImage(ClCmdQ.get(), Image, Res.devPtr, SrcDesc); return Tex.release(); } @@ -363,14 +362,14 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, return nullptr; } -CHIPDeviceOpenCL::CHIPDeviceOpenCL(CHIPContextOpenCL *ChipCtx, - cl::Device *DevIn, int Idx) - : CHIPDevice(ChipCtx, Idx), ClDevice(DevIn), ClContext(ChipCtx->get()) { +CHIPDeviceOpenCL::CHIPDeviceOpenCL(CHIPContextOpenCL *ChipCtx, cl::Device DevIn, + int Idx) + : CHIPDevice(ChipCtx, Idx), ClDevice(DevIn) { logTrace("CHIPDeviceOpenCL initialized via OpenCL device pointer and context " "pointer"); cl_device_svm_capabilities DeviceSVMCapabilities; auto Status = - DevIn->getInfo(CL_DEVICE_SVM_CAPABILITIES, &DeviceSVMCapabilities); + DevIn.getInfo(CL_DEVICE_SVM_CAPABILITIES, &DeviceSVMCapabilities); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); this->SupportsFineGrainSVM = DeviceSVMCapabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER; @@ -381,7 +380,7 @@ CHIPDeviceOpenCL::CHIPDeviceOpenCL(CHIPContextOpenCL *ChipCtx, } } -CHIPDeviceOpenCL *CHIPDeviceOpenCL::create(cl::Device *ClDevice, +CHIPDeviceOpenCL *CHIPDeviceOpenCL::create(cl::Device ClDevice, CHIPContextOpenCL *ChipContext, int Idx) { CHIPDeviceOpenCL *Dev = new CHIPDeviceOpenCL(ChipContext, ClDevice, Idx); @@ -394,22 +393,21 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { cl_int Err; std::string Temp; - this->MaxMallocSize_ = ClDevice->getInfo(); - assert(ClDevice != nullptr); - Temp = ClDevice->getInfo(); + this->MaxMallocSize_ = ClDevice.getInfo(); + Temp = ClDevice.getInfo(); strncpy(HipDeviceProps_.name, Temp.c_str(), 255); HipDeviceProps_.name[255] = 0; HipDeviceProps_.totalGlobalMem = - ClDevice->getInfo(&Err); + ClDevice.getInfo(&Err); HipDeviceProps_.sharedMemPerBlock = - ClDevice->getInfo(&Err); + ClDevice.getInfo(&Err); HipDeviceProps_.maxThreadsPerBlock = - ClDevice->getInfo(&Err); + ClDevice.getInfo(&Err); - std::vector Wi = ClDevice->getInfo(); + std::vector Wi = ClDevice.getInfo(); HipDeviceProps_.maxThreadsDim[0] = Wi[0]; HipDeviceProps_.maxThreadsDim[1] = Wi[1]; @@ -417,23 +415,23 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { // Maximum configured clock frequency of the device in MHz. HipDeviceProps_.clockRate = - 1000 * ClDevice->getInfo(); + 1000 * ClDevice.getInfo(); HipDeviceProps_.multiProcessorCount = - ClDevice->getInfo(); + ClDevice.getInfo(); HipDeviceProps_.l2CacheSize = - ClDevice->getInfo(); + ClDevice.getInfo(); // not actually correct HipDeviceProps_.totalConstMem = - ClDevice->getInfo(); + ClDevice.getInfo(); // totally made up HipDeviceProps_.regsPerBlock = 64; HipDeviceProps_.warpSize = CHIP_DEFAULT_WARP_SIZE; // Try to check that we support the default warp size. - std::vector Sg = ClDevice->getInfo(); + std::vector Sg = ClDevice.getInfo(); if (std::find(Sg.begin(), Sg.end(), CHIP_DEFAULT_WARP_SIZE) == Sg.end()) { logWarn( "The device might not support subgroup size {}, warp-size sensitive " @@ -453,7 +451,7 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { HipDeviceProps_.computeMode = 0; HipDeviceProps_.arch = {}; - Temp = ClDevice->getInfo(); + Temp = ClDevice.getInfo(); if (Temp.find("cl_khr_global_int32_base_atomics") != std::string::npos) HipDeviceProps_.arch.hasGlobalInt32Atomics = 1; else @@ -514,11 +512,11 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { HipDeviceProps_.pageableMemoryAccess = 0; HipDeviceProps_.pageableMemoryAccessUsesHostPageTables = 0; - auto Max1D2DWidth = ClDevice->getInfo(); - auto Max2DHeight = ClDevice->getInfo(); - auto Max3DWidth = ClDevice->getInfo(); - auto Max3DHeight = ClDevice->getInfo(); - auto Max3DDepth = ClDevice->getInfo(); + auto Max1D2DWidth = ClDevice.getInfo(); + auto Max2DHeight = ClDevice.getInfo(); + auto Max3DWidth = ClDevice.getInfo(); + auto Max3DHeight = ClDevice.getInfo(); + auto Max3DDepth = ClDevice.getInfo(); // Clamp texture dimensions to [0, INT_MAX] because the return value // of hipDeviceGetAttribute() is int type. @@ -531,7 +529,6 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { HipDeviceProps_.maxTexture3D[2] = clampToInt(Max3DDepth); } -void CHIPDeviceOpenCL::resetImpl() { UNIMPLEMENTED(); } // CHIPEventOpenCL // ************************************************************************ @@ -544,36 +541,21 @@ CHIPEventOpenCL::CHIPEventOpenCL(CHIPContextOpenCL *ChipContext, CHIPEventOpenCL::CHIPEventOpenCL(CHIPContextOpenCL *ChipContext, CHIPEventFlags Flags) - : CHIPEventOpenCL(ChipContext, nullptr, Flags, false) {} + : CHIPEventOpenCL(ChipContext, nullptr, Flags) {} uint64_t CHIPEventOpenCL::getFinishTime() { - int Status; + int Status = CL_SUCCESS; uint64_t Ret; - Status = clGetEventProfilingInfo(ClEvent, CL_PROFILING_COMMAND_END, - sizeof(Ret), &Ret, NULL); + Ret = ClEvent.getProfilingInfo(&Status); if (Status != CL_SUCCESS) { - auto Status = clGetEventInfo(ClEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, - sizeof(int), &EventStatus_, NULL); - CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + logError("Failed to query event for profiling info."); + return 0; } - // CHIPERR_CHECK_LOG_AND_THROW(status, CL_SUCCESS, hipErrorTbd, - // "Failed to query event for profiling info."); - return Ret; -} -size_t CHIPEventOpenCL::getRefCount() { - cl_uint RefCount; - if (ClEvent == nullptr) - return 0; - int Status = clGetEventInfo(getNativeRef(), CL_EVENT_REFERENCE_COUNT, 4, - &RefCount, NULL); - CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); - return RefCount; + return Ret; } -CHIPEventOpenCL::~CHIPEventOpenCL() { ClEvent = nullptr; } - CHIPEventOpenCL *CHIPBackendOpenCL::createCHIPEvent(CHIPContext *ChipCtx, CHIPEventFlags Flags, bool UserEvent) { @@ -584,25 +566,37 @@ CHIPEventOpenCL *CHIPBackendOpenCL::createCHIPEvent(CHIPContext *ChipCtx, } void CHIPEventOpenCL::recordStream(CHIPQueue *ChipQueue) { + LOCK(Backend->EventsMtx); // trackImpl CHIPBackend::Events + LOCK(EventMtx); // changing this event's fields logTrace("CHIPEvent::recordStream()"); - auto MarkerEvent = ChipQueue->enqueueMarker(); - this->takeOver(MarkerEvent); - this->EventStatus_ = EVENT_STATUS_RECORDING; + CHIPEventOpenCL *Marker = (CHIPEventOpenCL *)ChipQueue->enqueueMarkerImpl(); + // see operator=() on cl::Event + // should automatically release ClEvent if it already contains valid handle + ClEvent = Marker->ClEvent; + Msg = "recordStreamMarker"; + EventStatus_ = EVENT_STATUS_RECORDING; + delete Marker; + + ChipQueue->updateLastEvent(this); + // can't use this->track() because it calls locks + if (!TrackCalled_) { + Backend->Events.push_back(this); + TrackCalled_ = true; + } + return; } -void CHIPEventOpenCL::takeOver(CHIPEvent *OtherIn) { - logTrace("CHIPEventOpenCL::takeOver"); - decreaseRefCount("takeOver"); - { - auto *Other = (CHIPEventOpenCL *)OtherIn; - LOCK(EventMtx); // CHIPEvent::Refc_ - this->ClEvent = Other->ClEvent; - this->Refc_ = Other->Refc_; - this->Msg = Other->Msg; +size_t CHIPEventOpenCL::getCHIPRefc() { + int Err = CL_SUCCESS; + size_t RefC = ClEvent.getInfo(&Err); + if (Err != CL_SUCCESS) { + logError("failed to get Reference count from OpenCL event"); + return 0; + } else { + return RefC; } - increaseRefCount("takeOver"); } bool CHIPEventOpenCL::wait() { @@ -613,21 +607,20 @@ bool CHIPEventOpenCL::wait() { return false; } - auto Status = clWaitForEvents(1, &ClEvent); - + auto Status = ClEvent.wait(); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); return true; } bool CHIPEventOpenCL::updateFinishStatus(bool ThrowErrorIfNotReady) { logTrace("CHIPEventOpenCL::updateFinishStatus()"); - if (ThrowErrorIfNotReady && this->ClEvent == nullptr) + if (ThrowErrorIfNotReady && ClEvent.get() == nullptr) CHIPERR_LOG_AND_THROW("OpenCL has not been initialized cl_event is null", hipErrorNotReady); - int UpdatedStatus; - auto Status = clGetEventInfo(ClEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, - sizeof(int), &UpdatedStatus, NULL); + int Status = CL_SUCCESS; + int UpdatedStatus = + ClEvent.getInfo(&Status); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); if (ThrowErrorIfNotReady && UpdatedStatus != CL_COMPLETE) { CHIPERR_LOG_AND_THROW("Event not yet ready", hipErrorNotReady); @@ -684,45 +677,12 @@ float CHIPEventOpenCL::getElapsedTime(CHIPEvent *OtherIn) { return (float)MS + FractInMS; } -void CHIPEventOpenCL::hostSignal() { UNIMPLEMENTED(); } - -void CHIPEventOpenCL::increaseRefCount(std::string Reason) { - LOCK(EventMtx); // CHIPEvent::Refc_ - auto status = clRetainEvent(this->ClEvent); - if (!UserEvent_) - assert(status == 0); - // logDebug("CHIPEventOpenCL::increaseRefCount() {} {} refc {}->{} REASON: - // {}", - // (void *)this, Msg.c_str(), *Refc_, *Refc_ + 1, Reason); - (*Refc_)++; - assert(*Refc_ = getRefCount() - 1); - // logDebug("CHIPEventOpenCL::increaseRefCount() {} OpenCL RefCount: {}", - // (void *)this, getRefCount()); -} - -void CHIPEventOpenCL::decreaseRefCount(std::string Reason) { - LOCK(EventMtx); // CHIPEvent::Refc_ - // logDebug("CHIPEventOpenCL::decreaseRefCount() {} OpenCL RefCount: {}", - // (void *)this, getRefCount()); - // logDebug("CHIPEventOpenCL::decreaseRefCount() {} {} refc {}->{} REASON: - // {}", - // (void *)this, Msg.c_str(), *Refc_, *Refc_ - 1, Reason); - if (*Refc_ > 0) { - (*Refc_)--; - } else { - logError("CHIPEvent::decreaseRefCount() called when refc == 0"); - } - clReleaseEvent(this->ClEvent); -} - // CHIPModuleOpenCL //************************************************************************* CHIPModuleOpenCL::CHIPModuleOpenCL(const SPVModule &SrcMod) : CHIPModule(SrcMod) {} -cl::Program *CHIPModuleOpenCL::get() { return &Program_; } - void CHIPModuleOpenCL::compile(CHIPDevice *ChipDev) { // TODO make compile_ which calls consumeSPIRV() @@ -735,7 +695,7 @@ void CHIPModuleOpenCL::compile(CHIPDevice *ChipDev) { int Err; auto SrcBin = Src_->getBinary(); std::vector BinaryVec(SrcBin.begin(), SrcBin.end()); - auto Program = cl::Program(*(ChipCtxOcl->get()), BinaryVec, false, &Err); + auto Program = cl::Program(ChipCtxOcl->get(), BinaryVec, false, &Err); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorInitializationError); // for (CHIPDevice *chip_dev : chip_devices) { @@ -744,7 +704,7 @@ void CHIPModuleOpenCL::compile(CHIPDevice *ChipDev) { auto ErrBuild = Err; std::string Log = - Program.getBuildInfo(*ChipDevOcl->ClDevice, &Err); + Program.getBuildInfo(ChipDevOcl->get(), &Err); if (ErrBuild != CL_SUCCESS) logError("Program BUILD LOG for device #{}:{}:\n{}\n", ChipDevOcl->getDeviceId(), Name, Log); @@ -761,7 +721,7 @@ void CHIPModuleOpenCL::compile(CHIPDevice *ChipDev) { CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorInitializationError); logTrace("Kernels in CHIPModuleOpenCL: {} \n", Kernels.size()); - for (int KernelIdx = 0; KernelIdx < Kernels.size(); KernelIdx++) { + for (size_t KernelIdx = 0; KernelIdx < Kernels.size(); KernelIdx++) { auto Kernel = Kernels[KernelIdx]; std::string HostFName = Kernel.getInfo(&Err); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorInitializationError, @@ -798,9 +758,6 @@ CHIPQueue *CHIPDeviceOpenCL::createQueue(const uintptr_t *NativeHandles, // CHIPKernelOpenCL //************************************************************************* -SPVFuncInfo *CHIPKernelOpenCL::getFuncInfo() const { return FuncInfo_; } -std::string CHIPKernelOpenCL::getName() { return Name_; } -cl::Kernel *CHIPKernelOpenCL::get() { return &OclKernel_; } /// Clones the instance but with separate cl_kernel handle. CHIPKernelOpenCL *CHIPKernelOpenCL::clone() { @@ -847,14 +804,14 @@ CHIPKernelOpenCL::CHIPKernelOpenCL(cl::Kernel ClKernel, CHIPDeviceOpenCL *Dev, assert(FuncInfo_->getNumKernelArgs() == NumArgs); MaxWorkGroupSize_ = - OclKernel_.getWorkGroupInfo(*Device->get()); + OclKernel_.getWorkGroupInfo(Device->get()); StaticLocalSize_ = - OclKernel_.getWorkGroupInfo(*Device->get()); + OclKernel_.getWorkGroupInfo(Device->get()); MaxDynamicLocalSize_ = (size_t)Device->getAttr(hipDeviceAttributeMaxSharedMemoryPerBlock) - StaticLocalSize_; PrivateSize_ = - OclKernel_.getWorkGroupInfo(*Device->get()); + OclKernel_.getWorkGroupInfo(Device->get()); Name_ = OclKernel_.getInfo(); @@ -885,10 +842,9 @@ void CHIPContextOpenCL::freeImpl(void *Ptr) { SvmMemory.free(Ptr); } -cl::Context *CHIPContextOpenCL::get() { return ClContext; } - -CHIPContextOpenCL::CHIPContextOpenCL(cl::Context *CtxIn, cl::Device Dev, +CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, cl::Platform Plat) { + logTrace("CHIPContextOpenCL Initialized via OpenCL Context pointer."); std::string DevExts = Dev.getInfo(); std::memset(&Exts, 0, sizeof(Exts)); @@ -962,7 +918,7 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context *CtxIn, cl::Device Dev, #endif ClContext = CtxIn; - SvmMemory.init(*CtxIn); + SvmMemory.init(CtxIn); } void *CHIPContextOpenCL::allocateImpl(size_t Size, size_t Alignment, @@ -994,7 +950,7 @@ void CL_CALLBACK pfn_notify(cl_event Event, cl_int CommandExecStatus, return; Cbo->Callback(Cbo->Stream, Cbo->Status, Cbo->UserData); if (Cbo->CallbackFinishEvent != nullptr) { - clSetUserEventStatus(Cbo->CallbackFinishEvent->ClEvent, CL_COMPLETE); + static_cast(Cbo->CallbackFinishEvent->get()).setStatus(CL_COMPLETE); Cbo->CallbackFinishEvent->decreaseRefCount("Notified finished."); } delete Cbo; @@ -1007,21 +963,20 @@ void CHIPQueueOpenCL::MemMap(const AllocationInfo *AllocInfo, logDebug("Device supports fine grain SVM. Skipping MemMap/Unmap"); } cl_int Status; + // TODO why does this code use blocking = true ?? if (Type == CHIPQueue::MEM_MAP_TYPE::HOST_READ) { logDebug("CHIPQueueOpenCL::MemMap HOST_READ"); - Status = - clEnqueueSVMMap(ClQueue_->get(), CL_TRUE, CL_MAP_READ, - AllocInfo->HostPtr, AllocInfo->Size, 0, NULL, NULL); + Status = ClQueue.enqueueMapSVM(AllocInfo->HostPtr, CL_TRUE, CL_MAP_READ, + AllocInfo->Size); } else if (Type == CHIPQueue::MEM_MAP_TYPE::HOST_WRITE) { logDebug("CHIPQueueOpenCL::MemMap HOST_WRITE"); - Status = - clEnqueueSVMMap(ClQueue_->get(), CL_TRUE, CL_MAP_WRITE, - AllocInfo->HostPtr, AllocInfo->Size, 0, NULL, NULL); + Status = ClQueue.enqueueMapSVM(AllocInfo->HostPtr, CL_TRUE, CL_MAP_WRITE, + AllocInfo->Size); } else if (Type == CHIPQueue::MEM_MAP_TYPE::HOST_READ_WRITE) { logDebug("CHIPQueueOpenCL::MemMap HOST_READ_WRITE"); - Status = - clEnqueueSVMMap(ClQueue_->get(), CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - AllocInfo->HostPtr, AllocInfo->Size, 0, NULL, NULL); + Status = ClQueue.enqueueMapSVM(AllocInfo->HostPtr, CL_TRUE, + CL_MAP_READ | CL_MAP_WRITE, + AllocInfo->Size); } else { assert(0 && "Invalid MemMap Type"); } @@ -1035,24 +990,23 @@ void CHIPQueueOpenCL::MemUnmap(const AllocationInfo *AllocInfo) { } logDebug("CHIPQueueOpenCL::MemUnmap"); - auto Status = - clEnqueueSVMUnmap(ClQueue_->get(), AllocInfo->HostPtr, 0, NULL, NULL); + auto Status = ClQueue.enqueueUnmapSVM(AllocInfo->HostPtr); assert(Status == CL_SUCCESS); } -cl::CommandQueue *CHIPQueueOpenCL::get() { return ClQueue_; } void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, void *UserData) { logTrace("CHIPQueueOpenCL::addCallback()"); - cl::Context *ClContext_ = ((CHIPContextOpenCL *)ChipContext_)->get(); + cl::Context &ClContext_ = ((CHIPContextOpenCL *)ChipContext_)->get(); cl_int Err; CHIPEventOpenCL *HoldBackEvent = (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); - - HoldBackEvent->ClEvent = clCreateUserEvent(ClContext_->get(), &Err); + cl::UserEvent HoldBackClEvent(ClContext_, &Err); + CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd); + HoldBackEvent->reset(HoldBackClEvent()); std::vector WaitForEvents{HoldBackEvent}; auto LastEvent = getLastEvent(); @@ -1071,9 +1025,9 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, CHIPEventOpenCL *CallbackEvent = (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); - - CallbackEvent->ClEvent = clCreateUserEvent(ClContext_->get(), &Err); + cl::UserEvent CallbackClEvent(ClContext_, &Err); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd); + CallbackEvent->reset(CallbackClEvent()); // Make the succeeding commands wait for the user event which will be // set CL_COMPLETE by the callback trampoline function pfn_notify after @@ -1087,16 +1041,14 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, // We know that the callback won't be yet launched since it's depending // on the barrier which waits for the user event. - auto Status = clSetEventCallback(HoldbackBarrierCompletedEv->ClEvent, - CL_COMPLETE, pfn_notify, Cb); + auto Status = HoldbackBarrierCompletedEv->get().setCallback(CL_COMPLETE, pfn_notify, Cb); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); updateLastEvent(CallbackCompleted); - ClQueue_->flush(); + ClQueue.flush(); // Now the CB can start executing in the background: - clSetUserEventStatus(HoldBackEvent->ClEvent, CL_COMPLETE); - HoldBackEvent->decreaseRefCount("Notified finished."); + HoldBackClEvent.setStatus(CL_COMPLETE); return; }; @@ -1104,9 +1056,10 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, CHIPEvent *CHIPQueueOpenCL::enqueueMarkerImpl() { CHIPEventOpenCL *MarkerEvent = (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); - auto Status = - clEnqueueMarker(this->get()->get(), MarkerEvent->getNativePtr()); + cl::Event RetEv; + auto Status = ClQueue.enqueueMarkerWithWaitList(nullptr, &RetEv); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + MarkerEvent->reset(std::move(RetEv)); MarkerEvent->Msg = "marker"; return MarkerEvent; } @@ -1133,11 +1086,10 @@ CHIPEvent *CHIPQueueOpenCL::launchImpl(CHIPExecItem *ExecItem) { dim3 GridDim = ChipOclExecItem->getGrid(); dim3 BlockDim = ChipOclExecItem->getBlock(); - const size_t NumDims = 3; - const size_t GlobalOffset[NumDims] = {0, 0, 0}; - const size_t Global[NumDims] = { - GridDim.x * BlockDim.x, GridDim.y * BlockDim.y, GridDim.z * BlockDim.z}; - const size_t Local[NumDims] = {BlockDim.x, BlockDim.y, BlockDim.z}; + const cl::NDRange GlobalOffset{0, 0, 0}; + const cl::NDRange Global{GridDim.x * BlockDim.x, GridDim.y * BlockDim.y, + GridDim.z * BlockDim.z}; + const cl::NDRange Local{BlockDim.x, BlockDim.y, BlockDim.z}; logTrace("Launch GLOBAL: {} {} {}", Global[0], Global[1], Global[2]); @@ -1147,12 +1099,14 @@ CHIPEvent *CHIPQueueOpenCL::launchImpl(CHIPExecItem *ExecItem) { #endif auto SvmAllocationsToKeepAlive = - annotateSvmPointers(*OclContext, Kernel->get()->get()); + annotateSvmPointers(*OclContext, Kernel->get().get()); + + cl::Event RetEv; + auto Status = ClQueue.enqueueNDRangeKernel(Kernel->get(), GlobalOffset, + Global, Local, nullptr, &RetEv); - auto Status = clEnqueueNDRangeKernel(ClQueue_->get(), Kernel->get()->get(), - NumDims, GlobalOffset, Global, Local, 0, - nullptr, LaunchEvent->getNativePtr()); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + LaunchEvent->reset(std::move(RetEv)); std::shared_ptr SpillBuf = ExecItem->getArgSpillBuffer(); @@ -1171,8 +1125,9 @@ CHIPEvent *CHIPQueueOpenCL::launchImpl(CHIPExecItem *ExecItem) { auto *CBData = new KernelEventCallbackData; CBData->ArgSpillBuffer = SpillBuf; CBData->SvmKeepAlives = std::move(SvmAllocationsToKeepAlive); - Status = clSetEventCallback(LaunchEvent->getNativeRef(), CL_COMPLETE, - kernelEventCallback, CBData); + Status = LaunchEvent->get().setCallback(CL_COMPLETE, kernelEventCallback, + CBData); + if (Status != CL_SUCCESS) { delete CBData; CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); @@ -1208,10 +1163,10 @@ CHIPQueueOpenCL::CHIPQueueOpenCL(CHIPDevice *ChipDevice, int Priority, logWarn("CHIPQueueOpenCL is ignoring Priority value"); if (Queue) - ClQueue_ = new cl::CommandQueue(Queue); + ClQueue = cl::CommandQueue(Queue); else { - cl::Context *ClContext_ = ((CHIPContextOpenCL *)ChipContext_)->get(); - cl::Device *ClDevice_ = ((CHIPDeviceOpenCL *)ChipDevice_)->get(); + cl::Context &ClContext_ = ((CHIPContextOpenCL *)ChipContext_)->get(); + cl::Device &ClDevice_ = ((CHIPDeviceOpenCL *)ChipDevice_)->get(); cl_int Status; // Adding priority breaks correctness? // cl_queue_properties QueueProperties[] = { @@ -1221,8 +1176,8 @@ CHIPQueueOpenCL::CHIPQueueOpenCL(CHIPDevice *ChipDevice, int Priority, CL_QUEUE_PROFILING_ENABLE, 0}; const cl_command_queue Q = clCreateCommandQueueWithProperties( - ClContext_->get(), ClDevice_->get(), QueueProperties, &Status); - ClQueue_ = new cl::CommandQueue(Q); + ClContext_.get(), ClDevice_.get(), QueueProperties, &Status); + ClQueue = cl::CommandQueue(Q); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorInitializationError); @@ -1238,6 +1193,7 @@ CHIPEvent *CHIPQueueOpenCL::memCopyAsyncImpl(void *Dst, const void *Src, CHIPEventOpenCL *Event = (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); logTrace("clSVMmemcpy {} -> {} / {} B\n", Src, Dst, Size); + cl::Event RetEv; if (Dst == Src) { // Although ROCm API ref says that Dst and Src should not overlap, // HIP seems to handle Dst == Src as a special (no-operation) case. @@ -1247,17 +1203,19 @@ CHIPEvent *CHIPQueueOpenCL::memCopyAsyncImpl(void *Dst, const void *Src, // makes/ it pass, but Intel CPU OpenCL returns CL_​MEM_​COPY_​OVERLAP // like it should. To unify the behavior, let's convert the special case to // a maker here, so we can return an event. - cl::Event MarkerEvent; - auto Status = clEnqueueMarker(ClQueue_->get(), Event->getNativePtr()); + auto Status = ClQueue.enqueueMarkerWithWaitList(nullptr, &RetEv); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); } else { #ifdef DUBIOUS_LOCKS LOCK(Backend->DubiousLockOpenCL) #endif - auto Status = ::clEnqueueSVMMemcpy(ClQueue_->get(), CL_FALSE, Dst, Src, - Size, 0, nullptr, Event->getNativePtr()); + cl_event E = nullptr; + auto Status = ::clEnqueueSVMMemcpy(ClQueue.get(), CL_FALSE, Dst, Src, Size, + 0, nullptr, &E); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorRuntimeMemory); + RetEv = E; } + Event->reset(std::move(RetEv)); return Event; } @@ -1265,7 +1223,7 @@ void CHIPQueueOpenCL::finish() { #ifdef DUBIOUS_LOCKS LOCK(Backend->DubiousLockOpenCL) #endif - auto Status = ClQueue_->finish(); + auto Status = ClQueue.finish(); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); } @@ -1276,15 +1234,18 @@ CHIPEvent *CHIPQueueOpenCL::memFillAsyncImpl(void *Dst, size_t Size, (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); logTrace("clSVMmemfill {} / {} B\n", Dst, Size); cl_event Ev = nullptr; - int Retval = ::clEnqueueSVMMemFill(ClQueue_->get(), Dst, Pattern, PatternSize, - Size, 0, nullptr, Event->getNativePtr()); - CHIPERR_CHECK_LOG_AND_THROW(Retval, CL_SUCCESS, hipErrorRuntimeMemory); + auto Status = ::clEnqueueSVMMemFill(ClQueue.get(), Dst, Pattern, PatternSize, + Size, 0, nullptr, &Ev); + CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorRuntimeMemory); + cl::Event RetEv(Ev); + Event->reset(std::move(RetEv)); return Event; }; CHIPEvent *CHIPQueueOpenCL::memCopy2DAsyncImpl(void *Dst, size_t Dpitch, const void *Src, size_t Spitch, size_t Width, size_t Height) { + // TODO UNIMPLEMENTED(nullptr); }; @@ -1293,6 +1254,7 @@ CHIPEvent *CHIPQueueOpenCL::memCopy3DAsyncImpl(void *Dst, size_t Dpitch, size_t Spitch, size_t Sspitch, size_t Width, size_t Height, size_t Depth) { + // TODO UNIMPLEMENTED(nullptr); }; @@ -1306,23 +1268,24 @@ hipError_t CHIPQueueOpenCL::getBackendHandles(uintptr_t *NativeInfo, *NumHandles = 4; // Get queue handler - NativeInfo[3] = (uintptr_t)ClQueue_->get(); + NativeInfo[3] = (uintptr_t)ClQueue.get(); // Get context handler - cl::Context *Ctx = ((CHIPContextOpenCL *)ChipContext_)->get(); - NativeInfo[2] = (uintptr_t)Ctx->get(); + cl::Context &Ctx = ((CHIPContextOpenCL *)ChipContext_)->get(); + NativeInfo[2] = (uintptr_t)Ctx.get(); // Get device handler - cl::Device *Dev = ((CHIPDeviceOpenCL *)ChipDevice_)->get(); - NativeInfo[1] = (uintptr_t)Dev->get(); + cl::Device &Dev = ((CHIPDeviceOpenCL *)ChipDevice_)->get(); + NativeInfo[1] = (uintptr_t)Dev.get(); // Get platform handler - cl_platform_id Plat = Dev->getInfo(); + cl_platform_id Plat = Dev.getInfo(); NativeInfo[0] = (uintptr_t)Plat; return hipSuccess; } CHIPEvent *CHIPQueueOpenCL::memPrefetchImpl(const void *Ptr, size_t Count) { + // TODO UNIMPLEMENTED(nullptr); } @@ -1333,35 +1296,27 @@ CHIPQueueOpenCL::enqueueBarrierImpl(std::vector *EventsToWaitFor) { #endif CHIPEventOpenCL *Event = (CHIPEventOpenCL *)Backend->createCHIPEvent(this->ChipContext_); - cl_int RefCount; + int Status; - Status = clGetEventInfo(Event->getNativeRef(), CL_EVENT_REFERENCE_COUNT, 4, - &RefCount, NULL); + cl::Event RetEv; + if (EventsToWaitFor && EventsToWaitFor->size() > 0) { - std::vector Events = {}; + std::vector Events = {}; for (auto E : *EventsToWaitFor) { auto Ee = (CHIPEventOpenCL *)E; - // assert(Ee->getRefCount() > 0); - Events.push_back(Ee->getNativeRef()); + Events.push_back(Ee->get()); } - // auto Status = ClQueue_->enqueueBarrierWithWaitList(&Events, &Barrier); - auto Status = - clEnqueueBarrierWithWaitList(ClQueue_->get(), Events.size(), - Events.data(), &(Event->getNativeRef())); - CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + Status = ClQueue.enqueueBarrierWithWaitList(&Events, &RetEv); } else { - // auto Status = ClQueue_->enqueueBarrierWithWaitList(nullptr, &Barrier); - auto Status = clEnqueueBarrierWithWaitList(ClQueue_->get(), 0, nullptr, - Event->getNativePtr()); - CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + Status = ClQueue.enqueueBarrierWithWaitList(nullptr, &RetEv); } - Status = clGetEventInfo(Event->getNativeRef(), CL_EVENT_REFERENCE_COUNT, 4, - &RefCount, NULL); + CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + Event->reset(std::move(RetEv)); return Event; } -/********************************************************************************/ +/*****************************************************************************/ CHIPGraphNative *CHIPQueueOpenCL::createNativeGraph() { // should not raise an error if we fail to create a graph, @@ -1370,7 +1325,7 @@ CHIPGraphNative *CHIPQueueOpenCL::createNativeGraph() { if (!Ctx->supportsCommandBuffers()) return nullptr; - cl_command_queue CQ = ClQueue_->get(); + cl_command_queue CQ = ClQueue.get(); int err = CL_SUCCESS; cl_command_buffer_khr Res = Ctx->exts()->clCreateCommandBufferKHR(1, &CQ, 0, &err); @@ -1390,10 +1345,12 @@ CHIPEvent *CHIPQueueOpenCL::enqueueNativeGraph(CHIPGraphNative *NativeGraph) { return nullptr; if (NativeGraph == nullptr) return nullptr; - cl_command_queue CQ = ClQueue_->get(); - int Status = Ctx->exts()->clEnqueueCommandBufferKHR( - 1, &CQ, G->get(), 0, nullptr, Event->getNativePtr()); + cl_command_queue CQ = ClQueue.get(); + cl_event TmpEv; + int Status = Ctx->exts()->clEnqueueCommandBufferKHR(1, &CQ, G->get(), 0, + nullptr, &TmpEv); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + Event->reset(TmpEv); return Event; } @@ -1404,8 +1361,7 @@ void CHIPQueueOpenCL::destroyNativeGraph(CHIPGraphNative *NativeGraph) { delete G; } -bool CHIPGraphNativeOpenCL::addNodeToNativeGraph(CHIPGraphNative *NativeGraph, - CHIPGraphNode *NewNode) { +bool CHIPGraphNativeOpenCL::addNode(CHIPGraphNode *NewNode) { cl_sync_point_khr NewSyncPoint = -1; // map the dependent CHIPGraphNodes to OpenCL syncpoints @@ -1519,11 +1475,12 @@ bool CHIPGraphNativeOpenCL::addKernelNode( assert(Exts->clCommandNDRangeKernelKHR); Status = Exts->clCommandNDRangeKernelKHR( - Handle, CmdQ, Properties, CLK->get()->get(), - WorkDim, // cl_uint work_dim - nullptr, // const size_t* global_work_offset, - GWSize, // const size_t* global_work_size, - LWSize, // const size_t* local_work_size, + Handle, CmdQ, Properties, + CLK->get().get(), // cl_kernel + WorkDim, // cl_uint work_dim + nullptr, // const size_t* global_work_offset, + GWSize, // const size_t* global_work_size, + LWSize, // const size_t* local_work_size, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); return Status == CL_SUCCESS; } @@ -1721,8 +1678,9 @@ bool CHIPGraphNativeOpenCL::addEventRecordNode( CHIPEventOpenCL *CLE = static_cast(E); int Status; - Status = Exts->clCommandSignalEventPOCL(Handle, CmdQ, CLE->ClEvent, SyncPoint, - nullptr); + // TODO BROKEN + Status = + Exts->clCommandSignalEventPOCL(Handle, CmdQ, nullptr, SyncPoint, nullptr); return Status == CL_SUCCESS; } @@ -1737,7 +1695,7 @@ bool CHIPGraphNativeOpenCL::addEventWaitNode( CHIPEventOpenCL *CLE = static_cast(E); int Status; - Status = Exts->clCommandWaitForEventPOCL(Handle, CmdQ, CLE->ClEvent, + Status = Exts->clCommandWaitForEventPOCL(Handle, CmdQ, CLE->get().get(), SyncPoint, nullptr); return Status == CL_SUCCESS; } @@ -1746,8 +1704,6 @@ bool CHIPGraphNativeOpenCL::addEventWaitNode( // CHIPExecItemOpenCL //************************************************************************* -cl::Kernel *CHIPExecItemOpenCL::get() { return ClKernel_; } - void CHIPExecItemOpenCL::setupAllArgs() { if (!ArgsSetup) { ArgsSetup = true; @@ -1755,6 +1711,7 @@ void CHIPExecItemOpenCL::setupAllArgs() { return; } CHIPKernelOpenCL *Kernel = (CHIPKernelOpenCL *)getKernel(); + cl::Kernel &K = Kernel->get(); SPVFuncInfo *FuncInfo = Kernel->getFuncInfo(); int Err = 0; @@ -1774,8 +1731,7 @@ void CHIPExecItemOpenCL::setupAllArgs() { *reinterpret_cast(Arg.Data); cl_mem Image = TexObj->getImage(); logTrace("set image arg {} for tex {}\n", Arg.Index, (void *)TexObj); - Err = ::clSetKernelArg(Kernel->get()->get(), Arg.Index, sizeof(cl_mem), - &Image); + Err = K.setArg(Arg.Index, sizeof(cl_mem), &Image); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd, "clSetKernelArg failed for image argument."); break; @@ -1785,8 +1741,7 @@ void CHIPExecItemOpenCL::setupAllArgs() { *reinterpret_cast(Arg.Data); cl_sampler Sampler = TexObj->getSampler(); logTrace("set sampler arg {} for tex {}\n", Arg.Index, (void *)TexObj); - Err = ::clSetKernelArg(Kernel->get()->get(), Arg.Index, - sizeof(cl_sampler), &Sampler); + K.setArg(Arg.Index, sizeof(cl_sampler), &Sampler); CHIPERR_CHECK_LOG_AND_THROW( Err, CL_SUCCESS, hipErrorTbd, "clSetKernelArg failed for sampler argument."); @@ -1795,8 +1750,7 @@ void CHIPExecItemOpenCL::setupAllArgs() { case SPVTypeKind::POD: { logTrace("clSetKernelArg {} SIZE {} to {}\n", Arg.Index, Arg.Size, Arg.Data); - Err = - ::clSetKernelArg(Kernel->get()->get(), Arg.Index, Arg.Size, Arg.Data); + Err = K.setArg(Arg.Index, Arg.Size, Arg.Data); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd, "clSetKernelArg failed"); break; @@ -1805,16 +1759,16 @@ void CHIPExecItemOpenCL::setupAllArgs() { CHIPASSERT(Arg.Size == sizeof(void *)); if (Arg.isWorkgroupPtr()) { logTrace("setLocalMemSize to {}\n", SharedMem_); - Err = ::clSetKernelArg(Kernel->get()->get(), Arg.Index, SharedMem_, - nullptr); + Err = K.setArg(Arg.Index, SharedMem_, nullptr); } else { + const void *Ptr = *(const void **)Arg.Data; logTrace("clSetKernelArgSVMPointer {} SIZE {} to {} (value {})\n", - Arg.Index, Arg.Size, Arg.Data, *(const void **)Arg.Data); - Err = ::clSetKernelArgSVMPointer( - Kernel->get()->get(), Arg.Index, - // Unlike clSetKernelArg() which takes address to the argument, - // this function takes the argument value directly. - *(const void **)Arg.Data); + Arg.Index, Arg.Size, Arg.Data, Ptr); + + // Unlike clSetKernelArg() which takes address to the argument, + // this function takes the argument value directly. + Err = K.setArg(Arg.Index, Ptr); + if (Err != CL_SUCCESS) { // ROCm seems to allow passing invalid pointers to kernels if they are // not derefenced (see test_device_adjacent_difference of rocPRIM). @@ -1823,9 +1777,8 @@ void CHIPExecItemOpenCL::setupAllArgs() { logWarn( "clSetKernelArgSVMPointer {} SIZE {} to {} (value {}) returned " "error, setting the arg to nullptr\n", - Arg.Index, Arg.Size, Arg.Data, *(const void **)Arg.Data); - Err = ::clSetKernelArgSVMPointer(Kernel->get()->get(), Arg.Index, - nullptr); + Arg.Index, Arg.Size, Arg.Data, Ptr); + Err = K.setArg(Arg.Index, nullptr); } } CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd, @@ -1833,10 +1786,9 @@ void CHIPExecItemOpenCL::setupAllArgs() { break; } case SPVTypeKind::PODByRef: { - auto *SpillSlot = ArgSpillBuffer_->allocate(Arg); + void *SpillSlot = ArgSpillBuffer_->allocate(Arg); assert(SpillSlot); - Err = ::clSetKernelArgSVMPointer(Kernel->get()->get(), Arg.Index, - SpillSlot); + Err = K.setArg(Arg.Index, SpillSlot); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd, "clSetKernelArgSVMPointer failed"); break; @@ -1874,6 +1826,7 @@ CHIPExecItem *CHIPBackendOpenCL::createCHIPExecItem(dim3 GirdDim, dim3 BlockDim, new CHIPExecItemOpenCL(GirdDim, BlockDim, SharedMem, ChipQueue); return ExecItem; }; + CHIPQueue *CHIPBackendOpenCL::createCHIPQueue(CHIPDevice *ChipDev) { CHIPDeviceOpenCL *ChipDevCl = (CHIPDeviceOpenCL *)ChipDev; return new CHIPQueueOpenCL(ChipDevCl, OCL_DEFAULT_QUEUE_PRIORITY); @@ -1929,7 +1882,7 @@ void CHIPBackendOpenCL::initializeImpl(std::string CHIPPlatformStr, } std::stringstream StrStream; StrStream << "\nFound " << Platforms.size() << " OpenCL platforms:\n"; - for (int i = 0; i < Platforms.size(); i++) { + for (size_t i = 0; i < Platforms.size(); i++) { StrStream << i << ". " << Platforms[i].getInfo() << "\n"; } logTrace("{}", StrStream.str()); @@ -1968,21 +1921,19 @@ void CHIPBackendOpenCL::initializeImpl(std::string CHIPPlatformStr, std::exit(1); } - auto Device = SpirvDevices[SelectedDeviceIdx]; + cl::Device Device = SpirvDevices[SelectedDeviceIdx]; logDebug("CHIP_DEVICE={} Selected OpenCL device {}", SelectedDeviceIdx, Device.getInfo()); // Create context which has devices // Create queues that have devices each of which has an associated context // TODO Change this to spirv_enabled_devices - cl::Context *Ctx = new cl::Context(SpirvDevices); - CHIPContextOpenCL *ChipContext = - new CHIPContextOpenCL(Ctx, Device, SelectedPlatform); + CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL( + cl::Context(SpirvDevices), Device, SelectedPlatform); Backend->addContext(ChipContext); // TODO for now only a single device is supported. - cl::Device *clDev = new cl::Device(Device); - CHIPDeviceOpenCL *ChipDev = CHIPDeviceOpenCL::create(clDev, ChipContext, 0); + CHIPDeviceOpenCL *ChipDev = CHIPDeviceOpenCL::create(Device, ChipContext, 0); // Add device to context & backend ChipContext->setDevice(ChipDev); @@ -1993,18 +1944,19 @@ void CHIPBackendOpenCL::initializeFromNative(const uintptr_t *NativeHandles, int NumHandles) { logTrace("CHIPBackendOpenCL InitializeNative"); MinQueuePriority_ = CL_QUEUE_PRIORITY_MED_KHR; - // cl_platform_id PlatId = (cl_platform_id)NativeHandles[0]; + cl_platform_id PlatId = (cl_platform_id)NativeHandles[0]; cl_device_id DevId = (cl_device_id)NativeHandles[1]; cl_context CtxId = (cl_context)NativeHandles[2]; - cl::Device *Dev = new cl::Device(DevId); - cl::Platform Plat(Dev->getInfo()); - cl::Context *Ctx = new cl::Context(CtxId); - CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx, *Dev, Plat); + // Platform can also be get from this: Dev.getInfo() + cl::Platform Plat(PlatId); + cl::Device Dev(DevId); + cl::Context Ctx(CtxId); + CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx, Dev, Plat); addContext(ChipContext); CHIPDeviceOpenCL *ChipDev = CHIPDeviceOpenCL::create(Dev, ChipContext, 0); - logTrace("CHIPDeviceOpenCL {}", ChipDev->ClDevice->getInfo()); + logTrace("CHIPDeviceOpenCL {}", Dev.getInfo()); // Add device to context & backend ChipContext->setDevice(ChipDev); @@ -2027,7 +1979,7 @@ void *CHIPBackendOpenCL::getNativeEvent(hipEvent_t HipEvent) { CHIPEventOpenCL *E = (CHIPEventOpenCL *)HipEvent; if (!E->isRecordingOrRecorded()) return nullptr; - return (void *)E->ClEvent; + return (void *)E->get().get(); } // Other diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index 80949182a..6b0bb6012 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -39,8 +39,6 @@ #define CL_HPP_TARGET_OPENCL_VERSION 210 #define CL_HPP_MINIMUM_OPENCL_VERSION 200 -#pragma OPENCL EXTENSION cl_khr_priority_hints : enable - #include #pragma GCC diagnostic push @@ -86,9 +84,8 @@ public: }; class CHIPEventOpenCL : public CHIPEvent { -public: - cl_event ClEvent; - friend class CHIPEventOpenCL; +private: + cl::Event ClEvent; public: CHIPEventOpenCL(CHIPContextOpenCL *ChipContext, cl_event ClEvent, @@ -96,20 +93,22 @@ public: bool UserEvent = false); CHIPEventOpenCL(CHIPContextOpenCL *ChipContext, CHIPEventFlags Flags = CHIPEventFlags()); - virtual ~CHIPEventOpenCL() override; + virtual ~CHIPEventOpenCL() override{}; virtual void recordStream(CHIPQueue *ChipQueue) override; - void takeOver(CHIPEvent *Other); + virtual size_t getCHIPRefc() override; bool wait() override; float getElapsedTime(CHIPEvent *Other) override; - virtual void hostSignal() override; + + // not needed anywhere + virtual void hostSignal() override{}; + virtual bool updateFinishStatus(bool ThrowErrorIfNotReady = true) override; - cl_event *getNativePtr() { return &ClEvent; } - cl_event &getNativeRef() { return ClEvent; } - uint64_t getFinishTime(); - size_t getRefCount(); + cl::Event &get() { return ClEvent; } + void reset(cl::Event &&Ev) { ClEvent = Ev; } + void reset(cl_event Ev) { ClEvent = Ev; } - virtual void increaseRefCount(std::string Reason) override; - virtual void decreaseRefCount(std::string Reason) override; + // for elapsedTime + uint64_t getFinishTime(); }; class CHIPModuleOpenCL : public CHIPModule { @@ -120,7 +119,6 @@ public: CHIPModuleOpenCL(const SPVModule &SrcMod); virtual ~CHIPModuleOpenCL() {} virtual void compile(CHIPDevice *ChipDevice) override; - cl::Program *get(); }; class SVMemoryRegion { @@ -134,7 +132,7 @@ public: using const_svm_alloc_iterator = ConstMapKeyIterator< std::map, size_t, PointerCmp>>; - void init(cl::Context &C) { Context_ = C; } + void init(cl::Context C) { Context_ = C; } SVMemoryRegion &operator=(SVMemoryRegion &&Rhs); void *allocate(size_t Size, SVM_ALLOC_GRANULARITY Granularity = COARSE_GRAIN); bool free(void *P); @@ -179,7 +177,8 @@ typedef struct { } CHIPContextClExts; class CHIPContextOpenCL : public CHIPContext { - cl::Context *ClContext; +private: + cl::Context ClContext; bool SupportsCommandBuffers; bool SupportsCommandBuffersSVM; bool SupportsCommandBuffersHost; @@ -188,14 +187,14 @@ class CHIPContextOpenCL : public CHIPContext { public: bool allDevicesSupportFineGrainSVM(); - CHIPContextOpenCL(cl::Context *ClContext, cl::Device Dev, cl::Platform Plat); + CHIPContextOpenCL(cl::Context ClContext, cl::Device Dev, cl::Platform Plat); virtual ~CHIPContextOpenCL() {} void *allocateImpl(size_t Size, size_t Alignment, hipMemoryType MemType, CHIPHostAllocFlags Flags = CHIPHostAllocFlags()) override; bool isAllocatedPtrMappedToVM(void *Ptr) override { return false; } // TODO virtual void freeImpl(void *Ptr) override; - cl::Context *get(); + cl::Context &get() { return ClContext; } bool supportsCommandBuffers() { return SupportsCommandBuffers; } bool supportsCommandBuffersSVM() { return SupportsCommandBuffersSVM; } bool supportsCommandBuffersHost() { return SupportsCommandBuffersHost; } @@ -205,20 +204,18 @@ public: class CHIPDeviceOpenCL : public CHIPDevice { private: bool SupportsFineGrainSVM = false; - CHIPDeviceOpenCL(CHIPContextOpenCL *ChipContext, cl::Device *ClDevice, + CHIPDeviceOpenCL(CHIPContextOpenCL *ChipContext, cl::Device ClDevice, int Idx); + cl::Device ClDevice; public: - virtual CHIPContextOpenCL *createContext() override { return nullptr; } - - static CHIPDeviceOpenCL *create(cl::Device *ClDevice, + static CHIPDeviceOpenCL *create(cl::Device ClDevice, CHIPContextOpenCL *ChipContext, int Idx); - cl::Device *ClDevice; - cl::Context *ClContext; - cl::Device *get() { return ClDevice; } + cl::Device &get() { return ClDevice; } bool supportsFineGrainSVM() { return SupportsFineGrainSVM; } virtual void populateDevicePropertiesImpl() override; - virtual void resetImpl() override; + // unused + virtual void resetImpl() override{}; virtual CHIPQueue *createQueue(CHIPQueueFlags Flags, int Priority) override; virtual CHIPQueue *createQueue(const uintptr_t *NativeHandles, int NumHandles) override; @@ -241,7 +238,7 @@ public: class CHIPQueueOpenCL : public CHIPQueue { protected: // Any reason to make these private/protected? - cl::CommandQueue *ClQueue_; + cl::CommandQueue ClQueue; /** * @brief Map memory to device. @@ -279,7 +276,7 @@ public: virtual void finish() override; virtual CHIPEvent *memCopyAsyncImpl(void *Dst, const void *Src, size_t Size) override; - cl::CommandQueue *get(); + cl::CommandQueue &get() { return ClQueue; } virtual CHIPEvent *memFillAsyncImpl(void *Dst, size_t Size, const void *Pattern, size_t PatternSize) override; @@ -322,9 +319,10 @@ public: CHIPModuleOpenCL *Parent); virtual ~CHIPKernelOpenCL() {} - SPVFuncInfo *getFuncInfo() const; - std::string getName(); - cl::Kernel *get(); + + SPVFuncInfo *getFuncInfo() const { return FuncInfo_; } + std::string getName() { return Name_; } + cl::Kernel &get() { return OclKernel_; } CHIPKernelOpenCL *clone(); CHIPModuleOpenCL *getModule() override { return Module; } @@ -335,7 +333,7 @@ public: class CHIPExecItemOpenCL : public CHIPExecItem { private: std::unique_ptr ChipKernel_; - cl::Kernel *ClKernel_; + cl::Kernel ClKernel_; public: CHIPExecItemOpenCL(const CHIPExecItemOpenCL &Other) @@ -353,13 +351,11 @@ public: hipStream_t ChipQueue) : CHIPExecItem(GirdDim, BlockDim, SharedMem, ChipQueue) {} - virtual ~CHIPExecItemOpenCL() override { - // TODO delete ClKernel_? - } + virtual ~CHIPExecItemOpenCL() override {} SPVFuncInfo FuncInfo; virtual void setupAllArgs() override; - cl::Kernel *get(); + cl::Kernel &get() { return ClKernel_; } virtual CHIPExecItem *clone() const override { auto NewExecItem = new CHIPExecItemOpenCL(*this); return NewExecItem; From 4aeedfac65a35e0656bbfc8a9f739396a0b1f2f8 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Mon, 17 Apr 2023 23:06:02 +0300 Subject: [PATCH 07/35] add more error-checking & bugfixes "ctest --timeout 120 -R Unit_hipGraph" with POCL (with the new cl_pocl_command_buffer_* extensions) reports 59% tests passed, 62 tests failed out of 152 additionally, samples/graph + samples/graphMatrixMultiply work using the "native graphs" (cl_command_buffer), not the original chip-spv's graph execution. --- src/CHIPBackend.cc | 2 +- src/CHIPBindings.cc | 400 +++++++++++++++--------- src/CHIPGraph.cc | 76 +++-- src/CHIPGraph.hh | 31 +- src/backend/Level0/CHIPBackendLevel0.cc | 2 +- src/backend/OpenCL/CHIPBackendOpenCL.cc | 58 ++-- src/backend/OpenCL/CHIPBackendOpenCL.hh | 2 + 7 files changed, 361 insertions(+), 210 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index d175eca4d..8208ed3b9 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -465,7 +465,7 @@ void *CHIPArgSpillBuffer::allocate(const SPVFuncInfo::Arg &Arg) { void CHIPExecItem::copyArgs(void **Args) { // args need to be set up again ArgsSetup = false; - for (int i = 0; i < getNumArgs(); i++) { + for (size_t i = 0; i < getNumArgs(); i++) { Args_.push_back(Args[i]); } } diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index ae9a9cb0d..4550cb9e2 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -267,7 +267,7 @@ static void handleAbortRequest(CHIPQueue &Q, CHIPModule &M) { hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { CHIP_TRY if (!pGraph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraph *Graph = new CHIPGraph(); *pGraph = Graph; @@ -278,7 +278,7 @@ hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { hipError_t hipGraphDestroy(hipGraph_t graph) { CHIP_TRY if (!graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); delete graph; RETURN(hipSuccess); @@ -290,9 +290,9 @@ hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t *from, size_t numDependencies) { CHIP_TRY if (!graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); if (!from || !to) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); if (!FoundNode) @@ -309,9 +309,9 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, size_t numDependencies) { CHIP_TRY if (!graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); if (!from || !to) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); if (!FoundNode) @@ -326,17 +326,17 @@ hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, hipGraphNode_t *to, size_t *numEdges) { CHIP_TRY if (!graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); if (!from || !to || !numEdges) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); auto Edges = GRAPH(graph)->getEdges(); if (!to && !from) { *numEdges = Edges.size(); RETURN(hipSuccess); } - for (int i = 0; i < Edges.size(); i++) { + for (size_t i = 0; i < Edges.size(); i++) { auto Edge = Edges[i]; auto FromNode = Edge.first; auto ToNode = Edge.second; @@ -350,14 +350,21 @@ hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, size_t *numNodes) { CHIP_TRY - if (!graph) - RETURN(hipErrorInvalidHandle); - if (!nodes || !numNodes) - RETURN(hipErrorInvalidHandle); + if (!graph || !numNodes) + RETURN(hipErrorInvalidValue); + // if (!nodes && !numNodes) + // RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Nodes = GRAPH(graph)->getNodes(); - *nodes = *(Nodes.data()); - *numNodes = GRAPH(graph)->getNodes().size(); + if (nodes) { + if (numNodes && (*numNodes > Nodes.size())) + RETURN(hipErrorInvalidValue); + size_t ToCopy = numNodes ? *numNodes : Nodes.size(); + memcpy(nodes, Nodes.data(), ToCopy * sizeof(CHIPGraphNode *)); + } else { + // numNodes && nodes == nullptr + *numNodes = Nodes.size(); + } RETURN(hipSuccess); CHIP_CATCH } @@ -365,14 +372,20 @@ hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t *pRootNodes, size_t *pNumRootNodes) { CHIP_TRY - if (!graph) - RETURN(hipErrorInvalidHandle); - if (!pRootNodes || !pNumRootNodes) - RETURN(hipErrorInvalidHandle); + if (!graph || !pNumRootNodes) + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Nodes = GRAPH(graph)->getRootNodes(); - *pRootNodes = *(Nodes.data()); - *pNumRootNodes = GRAPH(graph)->getNodes().size(); + if (pRootNodes) { + if (pNumRootNodes && (*pNumRootNodes > Nodes.size())) + RETURN(hipErrorInvalidValue); + size_t ToCopy = pNumRootNodes ? *pNumRootNodes : Nodes.size(); + memcpy(pRootNodes, Nodes.data(), ToCopy * sizeof(CHIPGraphNode *)); + } else { + // numNodes && pRootNodes == nullptr + *pNumRootNodes = Nodes.size(); + } + RETURN(hipSuccess); CHIP_CATCH } @@ -382,15 +395,15 @@ hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, size_t *pNumDependencies) { CHIP_TRY if (!node) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); if (!pDependencies || !pNumDependencies) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Deps = NODE(node)->getDependencies(); *pNumDependencies = Deps.size(); if (!pDependencies) RETURN(hipSuccess); - for (int i = 0; i < Deps.size(); i++) { + for (size_t i = 0; i < Deps.size(); i++) { pDependencies[i] = Deps[i]; } RETURN(hipSuccess); @@ -402,13 +415,13 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, size_t *pNumDependentNodes) { CHIP_TRY if (!node || !pDependentNodes || !pNumDependentNodes) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Deps = NODE(node)->getDependants(); *pNumDependentNodes = Deps.size(); if (!pDependentNodes) RETURN(hipSuccess); - for (int i = 0; i < Deps.size(); i++) { + for (size_t i = 0; i < Deps.size(); i++) { pDependentNodes[i] = Deps[i]; } RETURN(hipSuccess); @@ -418,7 +431,7 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType *pType) { CHIP_TRY if (!pType || !node) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); *pType = NODE(node)->getType(); RETURN(hipSuccess); @@ -429,7 +442,7 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) { CHIP_TRY CHIPInitialize(); if (!node) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); /** * have to resort to these shenanigans to call the proper derived destructor */ @@ -476,7 +489,7 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) { hipError_t hipGraphClone(hipGraph_t *pGraphClone, hipGraph_t originalGraph) { CHIP_TRY if (!pGraphClone || !originalGraph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraph *CloneGraph = new CHIPGraph(*GRAPH(originalGraph)); *pGraphClone = CloneGraph; @@ -489,7 +502,7 @@ hipError_t hipGraphNodeFindInClone(hipGraphNode_t *pNode, hipGraph_t clonedGraph) { CHIP_TRY if (!pNode || !originalNode || !clonedGraph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Node = GRAPH(clonedGraph)->getClonedNodeFromOriginal(NODE(originalNode)); *pNode = Node; @@ -502,7 +515,7 @@ hipError_t hipGraphInstantiate(hipGraphExec_t *pGraphExec, hipGraph_t graph, size_t bufferSize) { CHIP_TRY if (!pGraphExec || !graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraphExec *GraphExec = new CHIPGraphExec(GRAPH(graph)); *pGraphExec = GraphExec; @@ -516,7 +529,7 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t *pGraphExec, unsigned long long flags) { CHIP_TRY if (!pGraphExec || !graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); // flags not yet defined in HIP API. UNIMPLEMENTED(hipErrorNotSupported); @@ -525,8 +538,8 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t *pGraphExec, hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { CHIP_TRY - if (!graphExec || !stream) - RETURN(hipErrorInvalidHandle); + if (!graphExec) + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto ChipQueue = static_cast(stream); ChipQueue = Backend->findQueue(ChipQueue); @@ -538,7 +551,7 @@ hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec) { CHIP_TRY if (!graphExec) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); delete graphExec; RETURN(hipSuccess); @@ -550,7 +563,7 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, hipGraphExecUpdateResult *updateResult_out) { CHIP_TRY if (!hGraphExec || !hGraph || !hErrorNode_out || !updateResult_out) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); // TODO Graphs - hipGraphExecUpdate /** @@ -664,8 +677,9 @@ hipError_t hipGraphAddKernelNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipKernelNodeParams *pNodeParams) { CHIP_TRY - if (!pGraphNode || !graph) - RETURN(hipErrorInvalidHandle); + if (!pGraphNode || !graph || !pNodeParams || pNodeParams->func == nullptr || + pNodeParams->kernelParams == nullptr) + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraphNodeKernel *Node = new CHIPGraphNodeKernel{pNodeParams}; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -679,10 +693,13 @@ hipError_t hipGraphAddKernelNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - *pNodeParams = ((CHIPGraphNodeKernel *)node)->getParams(); + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeKernel) + CHIPERR_LOG_AND_THROW("Node is not Kernel", hipErrorInvalidValue); + *pNodeParams = CastNode->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -690,10 +707,13 @@ hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - ((CHIPGraphNodeKernel *)node)->setParams(*pNodeParams); + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeKernel) + CHIPERR_LOG_AND_THROW("Node is not Kernel", hipErrorInvalidValue); + CastNode->setParams(*pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -702,16 +722,17 @@ hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipKernelNodeParams *pNodeParams) { CHIP_TRY - if (!hGraphExec) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!hGraphExec || !node || !pNodeParams) + RETURN(hipErrorInvalidValue); // Graph obtained from hipGraphExec_t is a clone of the original CHIPGraph *Graph = EXEC(hGraphExec)->getOriginalGraphPtr(); // KernelNode here is a handle to the original CHIPGraphNodeKernel *ExecKernelNode = static_cast( GRAPH(Graph)->getClonedNodeFromOriginal(NODE(node))); - assert(ExecKernelNode); + if (ExecKernelNode->getType() != hipGraphNodeTypeKernel) + CHIPERR_LOG_AND_THROW("Node is not Kernel", hipErrorInvalidValue); ExecKernelNode->setParams(*pNodeParams); RETURN(hipSuccess); @@ -725,14 +746,8 @@ hipError_t hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, CHIP_TRY CHIPInitialize(); - // graphs test seems wrong - normally we expect hipErrorInvalidHandle - // NULLCHECK(graph, pGraphNode, pCopyParams); if (!graph || !pGraphNode || !pCopyParams) - RETURN(hipErrorInvalidHandle); - if (pDependencies == nullptr & numDependencies > 0) - CHIPERR_LOG_AND_THROW( - "numDependencies is not 0 while pDependencies is null", - hipErrorInvalidValue); + RETURN(hipErrorInvalidValue); if (!pCopyParams->srcArray && !pCopyParams->srcPtr.ptr) CHIPERR_LOG_AND_THROW("all src are null", hipErrorInvalidValue); @@ -761,9 +776,14 @@ hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms *pNodeParams) { CHIP_TRY CHIPInitialize(); - hipMemcpy3DParms Params = - static_cast(node)->getParams(); - pNodeParams = &Params; + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); + + *pNodeParams = CastNode->getParams(); + RETURN(hipSuccess); CHIP_CATCH } @@ -772,7 +792,14 @@ hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms *pNodeParams) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setParams(pNodeParams); + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); + + CastNode->setParams(pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -782,6 +809,9 @@ hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipMemcpy3DParms *pNodeParams) { CHIP_TRY CHIPInitialize(); + if (!hGraphExec || !node || !pNodeParams) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) @@ -789,9 +819,8 @@ hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipErrorInvalidValue); auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemcpy", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); CastNode->setParams(const_cast(pNodeParams)); RETURN(hipSuccess); @@ -805,6 +834,9 @@ hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); + if (!graph || !pGraphNode || !dst || !src) + RETURN(hipErrorInvalidValue); + CHIPGraphNodeMemcpy *Node = new CHIPGraphNodeMemcpy(dst, src, count, kind); *pGraphNode = Node; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -819,10 +851,12 @@ hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void *dst, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); + if (!node || !dst || !src || !count) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemcpy", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); CastNode->setParams(dst, src, count, kind); RETURN(hipSuccess); @@ -835,6 +869,9 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); + if (!hGraphExec || !node) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) @@ -842,9 +879,8 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipErrorInvalidValue); auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemcpy", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); CastNode->setParams(dst, src, count, kind); RETURN(hipSuccess); @@ -875,8 +911,16 @@ hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void *dst, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setParams( - dst, symbol, count, offset, kind); + if (!symbol) + RETURN(hipErrorInvalidSymbol); + if (!node || !dst || !count) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpyFromSymbol) + CHIPERR_LOG_AND_THROW("Node is not MemcpyFromSymbol", hipErrorInvalidValue); + + CastNode->setParams(dst, symbol, count, offset, kind); RETURN(hipSuccess); CHIP_CATCH } @@ -886,16 +930,24 @@ hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol( const void *symbol, size_t count, size_t offset, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - // Graph obtained from hipGraphExec_t is a clone of the original - CHIPGraph *Graph = EXEC(hGraphExec)->getOriginalGraphPtr(); - // KernelNode here is a handle to the original - CHIPGraphNodeMemcpyFromSymbol *KernelNode = - ((CHIPGraphNodeMemcpyFromSymbol *)node); - CHIPGraphNodeMemcpyFromSymbol *ExecKernelNode = - ((CHIPGraphNodeMemcpyFromSymbol *)GRAPH(Graph)->getClonedNodeFromOriginal( - KernelNode)); + if (!symbol) + RETURN(hipErrorInvalidSymbol); + if (!node || !hGraphExec) + RETURN(hipErrorInvalidValue); + + auto ExecNode = + EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + if (!ExecNode) + CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", + hipErrorInvalidValue); - ExecKernelNode->setParams(dst, symbol, count, offset, kind); + auto CastNode = static_cast(node); + if (!CastNode) + CHIPERR_LOG_AND_THROW( + "Node provided failed to cast to CHIPGraphNodeMemcpyFromSymbol", + hipErrorInvalidValue); + + CastNode->setParams(dst, symbol, count, offset, kind); RETURN(hipSuccess); CHIP_CATCH } @@ -925,11 +977,17 @@ hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, size_t offset, hipMemcpyKind kind) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); + if (!symbol) + RETURN(hipErrorInvalidSymbol); + if (!node || !src || !count) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - static_cast(node)->setParams( - const_cast(src), symbol, count, offset, kind); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpyToSymbol) + CHIPERR_LOG_AND_THROW("Node is not MemcpyToSymbol", hipErrorInvalidValue); + + CastNode->setParams(const_cast(src), symbol, count, offset, kind); RETURN(hipSuccess); CHIP_CATCH } @@ -938,9 +996,12 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol( hipGraphExec_t hGraphExec, hipGraphNode_t node, const void *symbol, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { CHIP_TRY - if (!node || !hGraphExec) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!symbol) + RETURN(hipErrorInvalidSymbol); + if (!node || !hGraphExec) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) @@ -963,8 +1024,12 @@ hipError_t hipGraphAddMemsetNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipMemsetParams *pMemsetParams) { CHIP_TRY - if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); + if (!graph || !pGraphNode || !pMemsetParams || + pMemsetParams->dst == nullptr || pMemsetParams->height == 0) + RETURN(hipErrorInvalidValue); + if (pMemsetParams->elementSize != 1 && pMemsetParams->elementSize != 2 && + pMemsetParams->elementSize != 4) + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraphNodeMemset *Node = new CHIPGraphNodeMemset(pMemsetParams); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -978,12 +1043,15 @@ hipError_t hipGraphAddMemsetNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - hipMemsetParams Params = - static_cast(node)->getParams(); - *pNodeParams = Params; + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemset) + CHIPERR_LOG_AND_THROW("Node is not MemcpyFromSymbol", hipErrorInvalidValue); + + *pNodeParams = CastNode->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -991,10 +1059,15 @@ hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - static_cast(node)->setParams(pNodeParams); + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemset) + CHIPERR_LOG_AND_THROW("Node is not MemcpyFromSymbol", hipErrorInvalidValue); + + CastNode->setParams(pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -1003,9 +1076,10 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipMemsetParams *pNodeParams) { CHIP_TRY - if (!node || !hGraphExec) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!node || !hGraphExec) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) @@ -1013,9 +1087,8 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipErrorInvalidValue); auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemset", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeMemset) + CHIPERR_LOG_AND_THROW("Node is not MemcpyFromSymbol", hipErrorInvalidValue); CastNode->setParams(pNodeParams); RETURN(hipSuccess); @@ -1027,9 +1100,9 @@ hipError_t hipGraphAddHostNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipHostNodeParams *pNodeParams) { CHIP_TRY - if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!graph || !pGraphNode || !pNodeParams || pNodeParams->fn == nullptr) + RETURN(hipErrorInvalidValue); CHIPGraphNodeHost *Node = new CHIPGraphNodeHost(pNodeParams); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); @@ -1042,12 +1115,15 @@ hipError_t hipGraphAddHostNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - hipHostNodeParams Params = - static_cast(node)->getParams(); - *pNodeParams = Params; + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeHost) + CHIPERR_LOG_AND_THROW("NodeType is not Host", hipErrorInvalidValue); + + *pNodeParams = CastNode->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -1055,10 +1131,15 @@ hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); + if (!node || !pNodeParams || pNodeParams->fn == nullptr) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - static_cast(node)->setParams(pNodeParams); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeHost) + CHIPERR_LOG_AND_THROW("NodeType is not Host", hipErrorInvalidValue); + + CastNode->setParams(pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -1068,7 +1149,7 @@ hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, const hipHostNodeParams *pNodeParams) { CHIP_TRY if (!node || !hGraphExec) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); @@ -1077,9 +1158,8 @@ hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipErrorInvalidValue); auto CastNode = static_cast(ExecNode); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemset", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeHost) + CHIPERR_LOG_AND_THROW("NodeType is not Host", hipErrorInvalidValue); CastNode->setParams(pNodeParams); RETURN(hipSuccess); @@ -1092,8 +1172,8 @@ hipError_t hipGraphAddChildGraphNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipGraph_t childGraph) { CHIP_TRY - if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); + if (!graph || !pGraphNode || !childGraph) + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraphNodeGraph *Node = new CHIPGraphNodeGraph(GRAPH(childGraph)); *pGraphNode = Node; @@ -1108,9 +1188,14 @@ hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t *pGraph) { CHIP_TRY if (!node || !pGraph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); - *pGraph = static_cast(node)->getGraph(); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeGraph) + CHIPERR_LOG_AND_THROW("Node is not NodeTypeGraph", hipErrorInvalidValue); + + *pGraph = CastNode->getGraph(); RETURN(hipSuccess); CHIP_CATCH } @@ -1119,10 +1204,15 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipGraph_t childGraph) { CHIP_TRY - if (!node || !hGraphExec) - RETURN(hipErrorInvalidHandle); + if (!node || !hGraphExec || !childGraph) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - static_cast(node)->setGraph(GRAPH(childGraph)); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeGraph) + CHIPERR_LOG_AND_THROW("Node is not NodeTypeGraph", hipErrorInvalidValue); + + CastNode->setGraph(GRAPH(childGraph)); RETURN(hipSuccess); CHIP_CATCH } @@ -1132,7 +1222,7 @@ hipError_t hipGraphAddEmptyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies) { CHIP_TRY if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraphNodeEmpty *Node = new CHIPGraphNodeEmpty(); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -1148,8 +1238,8 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipEvent_t event) { CHIP_TRY - if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); + if (!graph || !pGraphNode || !event) + RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraphNodeEventRecord *Node = new CHIPGraphNodeEventRecord(static_cast(event)); @@ -1164,12 +1254,13 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t *event_out) { CHIP_TRY if (!node || !event_out) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); + auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Failed to cast CHIPGraphNodeEventRecord", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeEventRecord) + CHIPERR_LOG_AND_THROW("Node is not EventRecord", hipErrorInvalidValue); + *event_out = CastNode->getEvent(); RETURN(hipSuccess); CHIP_CATCH @@ -1178,13 +1269,14 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { CHIP_TRY - if (!node || !event) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!node || !event) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Failed to cast CHIPGraphNodeEventRecord", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeEventRecord) + CHIPERR_LOG_AND_THROW("Node is not EventRecord", hipErrorInvalidValue); + CastNode->setEvent(static_cast(event)); RETURN(hipSuccess); CHIP_CATCH @@ -1195,6 +1287,8 @@ hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipEvent_t event) { CHIP_TRY CHIPInitialize(); + if (!hNode || !hGraphExec || !event) + RETURN(hipErrorInvalidValue); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); if (!ExecNode) @@ -1202,10 +1296,8 @@ hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipErrorInvalidValue); auto CastNode = static_cast(hNode); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeEventRecord", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeEventRecord) + CHIPERR_LOG_AND_THROW("Node is not EventRecord", hipErrorInvalidValue); CastNode->setEvent(static_cast(event)); RETURN(hipSuccess); @@ -1218,8 +1310,12 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipEvent_t event) { CHIP_TRY CHIPInitialize(); + if (!graph || !pGraphNode || !event) + RETURN(hipErrorInvalidValue); + CHIPGraphNodeWaitEvent *Node = new CHIPGraphNodeWaitEvent(static_cast(event)); + *pGraphNode = Node; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); @@ -1232,11 +1328,12 @@ hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t *event_out) { CHIP_TRY CHIPInitialize(); + if (!node || !event_out) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeWaitEvent", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeWaitEvent) + CHIPERR_LOG_AND_THROW("Node is not WaitEvent", hipErrorInvalidValue); *event_out = CastNode->getEvent(); RETURN(hipSuccess); @@ -1247,11 +1344,12 @@ hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { CHIP_TRY CHIPInitialize(); + if (!node || !event) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeWaitEvent", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeWaitEvent) + CHIPERR_LOG_AND_THROW("Node is not WaitEvent", hipErrorInvalidValue); CastNode->setEvent(static_cast(event)); RETURN(hipSuccess); @@ -1263,6 +1361,9 @@ hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipEvent_t event) { CHIP_TRY CHIPInitialize(); + if (!hNode || !hGraphExec || !event) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); if (!ExecNode) @@ -1271,10 +1372,8 @@ hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, // TODO Grahs check all of these - somewhere using hNode instead of ExecNode auto CastNode = static_cast(ExecNode); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeWaitEvent", - hipErrorInvalidValue); + if (CastNode->getType() != hipGraphNodeTypeWaitEvent) + CHIPERR_LOG_AND_THROW("Node is not WaitEvent", hipErrorInvalidValue); CastNode->setEvent(static_cast(event)); RETURN(hipSuccess); @@ -1352,12 +1451,14 @@ hipError_t hipIpcOpenMemHandle(void **DevPtr, hipIpcMemHandle_t Handle, UNIMPLEMENTED(hipErrorNotSupported); CHIP_CATCH } + hipError_t hipIpcCloseMemHandle(void *DevPtr) { CHIP_TRY CHIPInitialize(); UNIMPLEMENTED(hipErrorNotSupported); CHIP_CATCH } + hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *Handle, void *DevPtr) { CHIP_TRY CHIPInitialize(); @@ -2260,9 +2361,6 @@ hipError_t hipEventDestroy(hipEvent_t Event) { CHIPEvent *ChipEvent = static_cast(Event); ChipEvent->decreaseRefCount("hipEventDestroy"); - if (ChipEvent->getCHIPRefc() != 0) { - logError("hipEventDestroy was called but remaining refcount is not 0"); - } RETURN(hipSuccess); CHIP_CATCH diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index ea3c4bcd1..69e0895c3 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -53,6 +53,18 @@ void CHIPGraphNode::DFS(std::vector CurrPath, return; } +void CHIPGraphNode::checkDependencies(size_t numDependencies, + CHIPGraphNode **pDependencies) { + if (numDependencies > 0 && pDependencies == nullptr) { + CHIPERR_LOG_AND_THROW("numDependencies > 0 && pDependencies == nullptr", + hipErrorInvalidValue); + } + if (numDependencies == 0 && pDependencies != nullptr) { + CHIPERR_LOG_AND_THROW("numDependencies == 0 && pDependencies != nullptr", + hipErrorInvalidValue); + } +} + CHIPGraph::CHIPGraph(const CHIPGraph &OriginalGraph) { /** * Create another Graph using the copy constructor. @@ -141,7 +153,7 @@ CHIPGraphNodeKernel::CHIPGraphNodeKernel(const hipKernelNodeParams *TheParams) Params_.kernelParams = TheParams->kernelParams; Params_.sharedMemBytes = TheParams->sharedMemBytes; auto Dev = Backend->getActiveDevice(); - CHIPKernel *Kernel_ = Dev->findKernel(HostPtr(Params_.func)); + Kernel_ = Dev->findKernel(HostPtr(Params_.func)); if (!Kernel_) CHIPERR_LOG_AND_THROW("Could not find requested kernel", hipErrorInvalidDeviceFunction); @@ -162,7 +174,7 @@ CHIPGraphNodeKernel::CHIPGraphNodeKernel(const void *HostFunction, dim3 GridDim, Params_.kernelParams = Args; Params_.sharedMemBytes = SharedMem; auto Dev = Backend->getActiveDevice(); - CHIPKernel *Kernel_ = Dev->findKernel(HostPtr(Params_.func)); + Kernel_ = Dev->findKernel(HostPtr(Params_.func)); if (!Kernel_) CHIPERR_LOG_AND_THROW("Could not find requested kernel", hipErrorInvalidDeviceFunction); @@ -192,36 +204,52 @@ void CHIPGraph::removeNode(CHIPGraphNode *Node) { } } -void CHIPGraphExec::launch(CHIPQueue *Queue) { - logDebug("{} CHIPGraphExec::launch({})", (void *)this, (void *)Queue); +bool CHIPGraphExec::tryLaunchNative(CHIPQueue *Queue) { bool UsedNativeGraph = false; - Queue->getContext(); - if (NativeGraph && NativeGraph->isFinalized()) { - // launch existing native graph - UsedNativeGraph = Queue->enqueueNativeGraph(NativeGraph.get()); - if (UsedNativeGraph) - Queue->finish(); - } else if (!NativeGraph) { - // construct native graph - bool FailedToAddNode = false; + if (NativeGraph) { + if (NativeGraph->isFinalized()) { + logDebug("NativeGraph: launching existing graph"); + } else { + logDebug("NativeGraph: constructed but failed to finalize"); + return false; + } + } else { + logDebug("NativeGraph: trying to construct"); NativeGraph.reset(Queue->createNativeGraph()); + if (!NativeGraph) + return false; + for (auto &Node : OriginalGraph_->getNodes()) { if (!NativeGraph->addNode(Node)) { - FailedToAddNode = true; - break; + logError("NativeGraph: failed to add node of type: {}", + Node->getType()); + return false; } } - if (!FailedToAddNode) { - UsedNativeGraph = NativeGraph->finalize() && - Queue->enqueueNativeGraph(NativeGraph.get()); - if (UsedNativeGraph) - Queue->finish(); + + if (!NativeGraph->finalize()) { + logDebug("NativeGraph: failed to finalize"); + return false; } } + assert(NativeGraph->isFinalized()); + if (Queue->enqueueNativeGraph(NativeGraph.get())) { + logDebug("NativeGraph: launched"); + Queue->finish(); + return true; + } else { + return false; + } +} + +void CHIPGraphExec::launch(CHIPQueue *Queue) { + logDebug("{} CHIPGraphExec::launch({})", (void *)this, (void *)Queue); + bool UsedNativeGraph = tryLaunchNative(Queue); + if (!UsedNativeGraph) { - // NativeGraph constructed but failed to finalize. - // Use the original code path + logDebug("NativeGraph: failed to construct/finalize/launch, using the " + "original code path"); compile(); auto ExecQueueCopy = ExecQueues_; while (ExecQueueCopy.size()) { @@ -255,7 +283,7 @@ void unchainUnnecessaryDeps(std::vector Path, } logDebug("unchainUnnecessaryDeps({}, {})", PathStr, LongerPathStr); - for (int i = 0; i < SubPath.size(); i++) { + for (size_t i = 0; i < SubPath.size(); i++) { if (SubPath[i] != Path[i]) { SubPath[i - 1]->removeDependency(SubPath[i]); break; @@ -401,7 +429,7 @@ void CHIPGraphNodeHost::execute(CHIPQueue *Queue) const { void CHIPGraphExec::ExtractSubGraphs_() { auto Nodes = CompiledGraph_.getNodes(); - for (int i = 0; i < Nodes.size(); i++) { + for (size_t i = 0; i < Nodes.size(); i++) { auto Node = Nodes[i]; if (Node->getType() == hipGraphNodeTypeGraph) { auto SubGraphNode = static_cast(Node); diff --git a/src/CHIPGraph.hh b/src/CHIPGraph.hh index 0b3124dcb..4b789ccb8 100644 --- a/src/CHIPGraph.hh +++ b/src/CHIPGraph.hh @@ -57,6 +57,8 @@ protected: CHIPGraphNode(hipGraphNodeType Type) : Type_(Type) {} + void checkDependencies(size_t numDependencies, CHIPGraphNode **pDependencies); + public: std::string Msg; // TODO Graphs cleanup CHIPGraphNode(const CHIPGraphNode &Other) @@ -119,6 +121,10 @@ public: * @param TheNode */ void addDependency(CHIPGraphNode *TheNode) { + if (TheNode == nullptr) { + CHIPERR_LOG_AND_THROW("addDependency called with nullptr", + hipErrorInvalidValue); + } logDebug("{} addDependency() <{} depends on {}>", (void *)this, Msg, TheNode->Msg); Dependencies_.push_back(TheNode); @@ -133,6 +139,10 @@ public: * @param TheNode */ void removeDependency(CHIPGraphNode *TheNode) { + if (TheNode == nullptr) { + CHIPERR_LOG_AND_THROW("removeDependency called with nullptr", + hipErrorInvalidValue); + } logDebug("{} removeDependency() <{} depends on {}>", (void *)this, Msg, TheNode->Msg); auto FoundNode = @@ -140,7 +150,7 @@ public: if (FoundNode != Dependencies_.end()) { Dependencies_.erase(FoundNode); } else { - CHIPERR_LOG_AND_THROW("Failed to find", hipErrorTbd); + CHIPERR_LOG_AND_THROW("Failed to find", hipErrorInvalidValue); } } @@ -152,9 +162,9 @@ public: * @param Dependencies * @param Count */ - void addDependencies(CHIPGraphNode **Dependencies, int Count) { - - for (int i = 0; i < Count; i++) { + void addDependencies(CHIPGraphNode **Dependencies, size_t Count) { + checkDependencies(Count, Dependencies); + for (size_t i = 0; i < Count; i++) { addDependency(Dependencies[i]); } } @@ -179,8 +189,9 @@ public: * * @param TheNode */ - void removeDependencies(CHIPGraphNode **Dependencies, int Count) { - for (int i = 0; i < Count; i++) { + void removeDependencies(CHIPGraphNode **Dependencies, size_t Count) { + checkDependencies(Count, Dependencies); + for (size_t i = 0; i < Count; i++) { removeDependency(Dependencies[i]); } } @@ -315,8 +326,8 @@ public: Src_(Other.Src_), Count_(Other.Count_), Kind_(Other.Kind_) {} CHIPGraphNodeMemcpy(hipMemcpy3DParms Params) - : CHIPGraphNode(hipGraphNodeTypeMemcpy), Params_(Params), Src_(nullptr), - Dst_(nullptr), Count_(0), Kind_(hipMemcpyKind::hipMemcpyDefault) {} + : CHIPGraphNode(hipGraphNodeTypeMemcpy), Params_(Params), Dst_(nullptr), + Src_(nullptr), Count_(0), Kind_(hipMemcpyKind::hipMemcpyDefault) {} CHIPGraphNodeMemcpy(const hipMemcpy3DParms *Params) : CHIPGraphNode(hipGraphNodeTypeMemcpy) { setParams(Params); @@ -651,7 +662,8 @@ public: std::vector getRootNodes(); CHIPGraphNode *getClonedNodeFromOriginal(CHIPGraphNode *OriginalNode) { if (!CloneMap_.count(OriginalNode)) { - CHIPERR_LOG_AND_THROW("Failed to find the node in clone", hipErrorTbd); + CHIPERR_LOG_AND_THROW("Failed to find the node in clone", + hipErrorInvalidValue); } else { return CloneMap_[OriginalNode]; } @@ -757,6 +769,7 @@ public: ~CHIPGraphExec() {} void launch(CHIPQueue *Queue); + bool tryLaunchNative(CHIPQueue *Queue); CHIPGraph *getOriginalGraphPtr() const { return OriginalGraph_; } }; diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index 9548d8354..5b75f48fc 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -226,7 +226,7 @@ void CHIPEventLevel0::reset() { LOCK(EventMtx); // CHIPEvent::TrackCalled_ TrackCalled_ = false; EventStatus_ = EVENT_STATUS_INIT; - *Refc_ = 1; + Refc_ = 1; #ifndef NDEBUG markDeleted(false); #endif diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 480d83cf4..0f7f9f574 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -234,12 +234,12 @@ annotateSvmPointers(const CHIPContextOpenCL &Ctx, cl_kernel KernelAPIHandle) { std::vector SvmAnnotationList; std::unique_ptr>> SvmKeepAlives; LOCK(Ctx.ContextMtx); // CHIPContextOpenCL::SvmMemory - auto NumSvmAllocations = Ctx.SvmMemory.getNumAllocations(); + auto NumSvmAllocations = Ctx.getRegion().getNumAllocations(); if (NumSvmAllocations) { SvmAnnotationList.reserve(NumSvmAllocations); SvmKeepAlives.reset(new std::vector>()); SvmKeepAlives->reserve(NumSvmAllocations); - for (std::shared_ptr Ptr : Ctx.SvmMemory.getSvmPointers()) { + for (std::shared_ptr Ptr : Ctx.getRegion().getSvmPointers()) { SvmAnnotationList.push_back(Ptr.get()); SvmKeepAlives->push_back(Ptr); } @@ -765,10 +765,9 @@ CHIPKernelOpenCL *CHIPKernelOpenCL::clone() { // NOTE: clCloneKernel is not used here due to its experience on // Intel (GPU) OpenCL which crashed if clSetKernelArgSVMPointer() was // called on the original cl_kernel. - auto Cloned = clCreateKernel(Module->get()->get(), Name_.c_str(), &Err); + auto Kernel = cl::Kernel(Module->get(), Name_.c_str(), &Err); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd); - return new CHIPKernelOpenCL(cl::Kernel(Cloned, false), Device, Name_, - getFuncInfo(), Module); + return new CHIPKernelOpenCL(Kernel, Device, Name_, getFuncInfo(), Module); } hipError_t CHIPKernelOpenCL::getAttributes(hipFuncAttributes *Attr) { @@ -851,6 +850,7 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, SupportsCommandBuffers = DevExts.find("cl_khr_command_buffer") != std::string::npos; if (SupportsCommandBuffers) { + logDebug("Device supports cl_khr_command_buffer"); Exts.clCreateCommandBufferKHR = (clCreateCommandBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCreateCommandBufferKHR"); @@ -887,6 +887,7 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, SupportsCommandBuffersSVM = DevExts.find("cl_pocl_command_buffer_svm") != std::string::npos; if (SupportsCommandBuffersSVM) { + logDebug("Device supports cl_pocl_command_buffer_svm"); Exts.clCommandSVMMemcpyPOCL = (clCommandSVMMemcpyPOCL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCommandSVMMemcpyPOCL"); @@ -905,6 +906,7 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, SupportsCommandBuffersHost = DevExts.find("cl_pocl_command_buffer_host_exec") != std::string::npos; if (SupportsCommandBuffersHost) { + logDebug("Device supports cl_pocl_command_buffer_host_exec"); Exts.clCommandHostFuncPOCL = (clCommandHostFuncPOCL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCommandHostFuncPOCL"); @@ -1329,8 +1331,11 @@ CHIPGraphNative *CHIPQueueOpenCL::createNativeGraph() { int err = CL_SUCCESS; cl_command_buffer_khr Res = Ctx->exts()->clCreateCommandBufferKHR(1, &CQ, 0, &err); - if (Res == nullptr || err != CL_SUCCESS) + if (Res == nullptr || err != CL_SUCCESS) { + logError("clCreateCommandBufferKHR FAILED with status {}", + resultToString(err)); return nullptr; + } return new CHIPGraphNativeOpenCL(Res, CQ, Ctx->exts()); } @@ -1346,7 +1351,7 @@ CHIPEvent *CHIPQueueOpenCL::enqueueNativeGraph(CHIPGraphNative *NativeGraph) { if (NativeGraph == nullptr) return nullptr; cl_command_queue CQ = ClQueue.get(); - cl_event TmpEv; + cl_event TmpEv = nullptr; int Status = Ctx->exts()->clEnqueueCommandBufferKHR(1, &CQ, G->get(), 0, nullptr, &TmpEv); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); @@ -1435,14 +1440,19 @@ bool CHIPGraphNativeOpenCL::finalize() { if (Status == CL_SUCCESS) { Finalized = true; return true; + } else { + logError("clFinalizeCommandBufferKHR FAILED with status {}", + resultToString(Status)); + return false; } - return false; } CHIPGraphNativeOpenCL::~CHIPGraphNativeOpenCL() { if (Handle == nullptr) return; int Err = Exts->clReleaseCommandBufferKHR(Handle); + logError("clReleaseCommandBufferKHR FAILED with status {}", + resultToString(Err)); assert(Err == CL_SUCCESS); } @@ -1475,7 +1485,7 @@ bool CHIPGraphNativeOpenCL::addKernelNode( assert(Exts->clCommandNDRangeKernelKHR); Status = Exts->clCommandNDRangeKernelKHR( - Handle, CmdQ, Properties, + Handle, nullptr, Properties, CLK->get().get(), // cl_kernel WorkDim, // cl_uint work_dim nullptr, // const size_t* global_work_offset, @@ -1504,7 +1514,7 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( // Intel GPU OpenCL driver seems to do also so for clEnqueueSVMMemcpy, which // makes/ it pass, but Intel CPU OpenCL returns CL_​MEM_​COPY_​OVERLAP // like it should. To unify the behavior, let's convert the special case to - // a maker here, so we can return an event. + // a marker here, so we can return an event. Node->getParams(Dst, Src, Size, Kind); Params = Node->getParams(); @@ -1549,21 +1559,21 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( size_t dst_slice_pitch = dst_row_pitch * Params.dstPtr.ysize; Status = Exts->clCommandSVMMemcpyRectPOCL( - Handle, CmdQ, Dst, Src, dst_origin, src_origin, region, dst_row_pitch, - dst_slice_pitch, src_row_pitch, src_slice_pitch, SyncPointDeps.size(), - SyncPointDeps.data(), SyncPoint, nullptr); + Handle, nullptr, Dst, Src, dst_origin, src_origin, region, + dst_row_pitch, dst_slice_pitch, src_row_pitch, src_slice_pitch, + SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); } else { // 1D copy if (!Exts->clCommandSVMMemcpyPOCL) return false; if (Dst == Src) { Status = Exts->clCommandBarrierWithWaitListKHR( - Handle, CmdQ, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, - nullptr); + Handle, nullptr, SyncPointDeps.size(), SyncPointDeps.data(), + SyncPoint, nullptr); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); } else { Status = Exts->clCommandSVMMemcpyPOCL( - Handle, CmdQ, Dst, Src, Size, SyncPointDeps.size(), + Handle, nullptr, Dst, Src, Size, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); } } @@ -1593,7 +1603,7 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( return false; int Status = Exts->clCommandSVMMemcpyPOCL( - Handle, CmdQ, Dst, (const char *)Src + Offset, SizeBytes, + Handle, nullptr, Dst, (const char *)Src + Offset, SizeBytes, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); return Status == CL_SUCCESS; @@ -1620,8 +1630,8 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( return false; int Status = Exts->clCommandSVMMemcpyPOCL( - Handle, CmdQ, (char *)Dst + Offset, Src, SizeBytes, SyncPointDeps.size(), - SyncPointDeps.data(), SyncPoint, nullptr); + Handle, nullptr, (char *)Dst + Offset, Src, SizeBytes, + SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); return Status == CL_SUCCESS; } @@ -1637,7 +1647,7 @@ bool CHIPGraphNativeOpenCL::addMemsetNode( int Status; size_t Region[3] = {Params.width, Params.height, 1}; Status = Exts->clCommandSVMMemfillRectPOCL( - Handle, CmdQ, Params.dst, + Handle, nullptr, Params.dst, nullptr, // origin Region, // region Params.pitch, // row pitch @@ -1661,7 +1671,7 @@ bool CHIPGraphNativeOpenCL::addHostNode( int Status; Status = Exts->clCommandHostFuncPOCL( - Handle, CmdQ, Params.fn, Params.userData, SyncPointDeps.size(), + Handle, nullptr, Params.fn, Params.userData, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); return Status == CL_SUCCESS; } @@ -1679,8 +1689,8 @@ bool CHIPGraphNativeOpenCL::addEventRecordNode( int Status; // TODO BROKEN - Status = - Exts->clCommandSignalEventPOCL(Handle, CmdQ, nullptr, SyncPoint, nullptr); + Status = Exts->clCommandSignalEventPOCL(Handle, nullptr, nullptr, SyncPoint, + nullptr); return Status == CL_SUCCESS; } @@ -1695,7 +1705,7 @@ bool CHIPGraphNativeOpenCL::addEventWaitNode( CHIPEventOpenCL *CLE = static_cast(E); int Status; - Status = Exts->clCommandWaitForEventPOCL(Handle, CmdQ, CLE->get().get(), + Status = Exts->clCommandWaitForEventPOCL(Handle, nullptr, CLE->get().get(), SyncPoint, nullptr); return Status == CL_SUCCESS; } diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index 6b0bb6012..0e330571f 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -117,6 +117,7 @@ protected: public: CHIPModuleOpenCL(const SPVModule &SrcMod); + cl::Program &get() { return Program_; } virtual ~CHIPModuleOpenCL() {} virtual void compile(CHIPDevice *ChipDevice) override; }; @@ -193,6 +194,7 @@ public: CHIPHostAllocFlags Flags = CHIPHostAllocFlags()) override; bool isAllocatedPtrMappedToVM(void *Ptr) override { return false; } // TODO + const SVMemoryRegion &getRegion() const { return SvmMemory; } virtual void freeImpl(void *Ptr) override; cl::Context &get() { return ClContext; } bool supportsCommandBuffers() { return SupportsCommandBuffers; } From 689cd1fb7ae11e1b0564901db3e54ab1e777d60d Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 10 May 2023 15:22:24 +0300 Subject: [PATCH 08/35] OpenCL backend: add support for cl_intel_unified_shared_memory Previously, the OpenCL device was checked for fine-grained SVM support, and if it was unavailable, the backend would assume only coarse-grained support and insert SVMMap & SVMUnmap for each buffer used by a kernel. Since Intel OpenCL and PoCL both support the USM extension now, but don't support fine-grained SVM, this commit helps to avoid the overhead of the unnecessary Map & Unmap commands. --- src/backend/OpenCL/CHIPBackendOpenCL.cc | 69 +++++++++++++++---------- src/backend/OpenCL/CHIPBackendOpenCL.hh | 26 +++++++--- src/backend/OpenCL/SVMemoryRegion.cc | 51 +++++++++++++++--- 3 files changed, 107 insertions(+), 39 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 0f7f9f574..9a4419bfd 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -367,17 +367,6 @@ CHIPDeviceOpenCL::CHIPDeviceOpenCL(CHIPContextOpenCL *ChipCtx, cl::Device DevIn, : CHIPDevice(ChipCtx, Idx), ClDevice(DevIn) { logTrace("CHIPDeviceOpenCL initialized via OpenCL device pointer and context " "pointer"); - cl_device_svm_capabilities DeviceSVMCapabilities; - auto Status = - DevIn.getInfo(CL_DEVICE_SVM_CAPABILITIES, &DeviceSVMCapabilities); - CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); - this->SupportsFineGrainSVM = - DeviceSVMCapabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER; - if (this->SupportsFineGrainSVM) { - logTrace("Device supports fine grain SVM"); - } else { - logTrace("Device does not support fine grain SVM"); - } } CHIPDeviceOpenCL *CHIPDeviceOpenCL::create(cl::Device ClDevice, @@ -827,13 +816,8 @@ CHIPKernelOpenCL::CHIPKernelOpenCL(cl::Kernel ClKernel, CHIPDeviceOpenCL *Dev, // CHIPContextOpenCL //************************************************************************* -bool CHIPContextOpenCL::allDevicesSupportFineGrainSVM() { - bool allFineGrainSVM = true; - if (!static_cast(this->ChipDevice_) - ->supportsFineGrainSVM()) { - allFineGrainSVM = false; - } - return allFineGrainSVM; +bool CHIPContextOpenCL::allDevicesSupportFineGrainSVMorUSM() { + return SupportsFineGrainSVM || SupportsIntelUSM; } void CHIPContextOpenCL::freeImpl(void *Ptr) { @@ -919,8 +903,39 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, } #endif + SupportsIntelUSM = + DevExts.find("cl_intel_unified_shared_memory") != std::string::npos; + if (SupportsIntelUSM) { + logDebug("Device supports Intel USM"); + Exts.USM.clSharedMemAllocINTEL = + (clSharedMemAllocINTEL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clSharedMemAllocINTEL"); + Exts.USM.clDeviceMemAllocINTEL = + (clDeviceMemAllocINTEL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clDeviceMemAllocINTEL"); + Exts.USM.clHostMemAllocINTEL = + (clHostMemAllocINTEL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clHostMemAllocINTEL"); + Exts.USM.clMemFreeINTEL = + (clMemFreeINTEL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clMemFreeINTEL"); + } else { + logDebug("Device does not support Intel USM"); + } + + cl_device_svm_capabilities DeviceSVMCapabilities; + int Err = Dev.getInfo(CL_DEVICE_SVM_CAPABILITIES, &DeviceSVMCapabilities); + CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd); + SupportsFineGrainSVM = + DeviceSVMCapabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER; + if (SupportsFineGrainSVM) { + logTrace("Device supports fine grain SVM"); + } else { + logTrace("Device does not support fine grain SVM"); + } + ClContext = CtxIn; - SvmMemory.init(CtxIn); + SvmMemory.init(CtxIn, Dev, Exts.USM, SupportsFineGrainSVM, SupportsIntelUSM); } void *CHIPContextOpenCL::allocateImpl(size_t Size, size_t Alignment, @@ -929,7 +944,7 @@ void *CHIPContextOpenCL::allocateImpl(size_t Size, size_t Alignment, void *Retval; LOCK(ContextMtx); // CHIPContextOpenCL::SvmMemory - Retval = SvmMemory.allocate(Size); + Retval = SvmMemory.allocate(Size, Alignment, MemType); return Retval; } @@ -960,9 +975,10 @@ void CL_CALLBACK pfn_notify(cl_event Event, cl_int CommandExecStatus, void CHIPQueueOpenCL::MemMap(const AllocationInfo *AllocInfo, CHIPQueue::MEM_MAP_TYPE Type) { - if (static_cast(this->getDevice()) - ->supportsFineGrainSVM()) { - logDebug("Device supports fine grain SVM. Skipping MemMap/Unmap"); + CHIPContextOpenCL *C = static_cast(ChipContext_); + if (C->allDevicesSupportFineGrainSVMorUSM()) { + logDebug("Device supports fine grain SVM or USM. Skipping MemMap/Unmap"); + return; } cl_int Status; // TODO why does this code use blocking = true ?? @@ -986,9 +1002,10 @@ void CHIPQueueOpenCL::MemMap(const AllocationInfo *AllocInfo, } void CHIPQueueOpenCL::MemUnmap(const AllocationInfo *AllocInfo) { - if (static_cast(this->getDevice()) - ->supportsFineGrainSVM()) { - logDebug("Device supports fine grain SVM. Skipping MemMap/Unmap"); + CHIPContextOpenCL *C = static_cast(ChipContext_); + if (C->allDevicesSupportFineGrainSVMorUSM()) { + logDebug("Device supports fine grain SVM or USM. Skipping MemMap/Unmap"); + return; } logDebug("CHIPQueueOpenCL::MemUnmap"); diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index 0e330571f..07e15ae39 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -122,20 +122,32 @@ public: virtual void compile(CHIPDevice *ChipDevice) override; }; +typedef struct { + clSharedMemAllocINTEL_fn clSharedMemAllocINTEL; + clDeviceMemAllocINTEL_fn clDeviceMemAllocINTEL; + clHostMemAllocINTEL_fn clHostMemAllocINTEL; + clMemFreeINTEL_fn clMemFreeINTEL; +} CHIPContextUSMExts; + class SVMemoryRegion { - enum SVM_ALLOC_GRANULARITY { COARSE_GRAIN, FINE_GRAIN }; // ContextMutex should be enough std::map, size_t, PointerCmp> SvmAllocations_; cl::Context Context_; + cl::Device Device_; + + CHIPContextUSMExts USM; + bool SupportsFineGrain; + bool SupportsIntelUSM; public: using const_svm_alloc_iterator = ConstMapKeyIterator< std::map, size_t, PointerCmp>>; - void init(cl::Context C) { Context_ = C; } + void init(cl::Context C, cl::Device D, CHIPContextUSMExts U, bool FineGrain, + bool IntelUSM); SVMemoryRegion &operator=(SVMemoryRegion &&Rhs); - void *allocate(size_t Size, SVM_ALLOC_GRANULARITY Granularity = COARSE_GRAIN); + void *allocate(size_t Size, size_t Alignment, hipMemoryType MemType); bool free(void *P); bool hasPointer(const void *Ptr); bool pointerSize(void *Ptr, size_t *Size); @@ -175,6 +187,8 @@ typedef struct { clCommandSignalEventPOCL_fn clCommandSignalEventPOCL; #endif + CHIPContextUSMExts USM; + } CHIPContextClExts; class CHIPContextOpenCL : public CHIPContext { @@ -183,11 +197,13 @@ private: bool SupportsCommandBuffers; bool SupportsCommandBuffersSVM; bool SupportsCommandBuffersHost; + bool SupportsIntelUSM; + bool SupportsFineGrainSVM; CHIPContextClExts Exts; SVMemoryRegion SvmMemory; public: - bool allDevicesSupportFineGrainSVM(); + bool allDevicesSupportFineGrainSVMorUSM(); CHIPContextOpenCL(cl::Context ClContext, cl::Device Dev, cl::Platform Plat); virtual ~CHIPContextOpenCL() {} void *allocateImpl(size_t Size, size_t Alignment, hipMemoryType MemType, @@ -205,7 +221,6 @@ public: class CHIPDeviceOpenCL : public CHIPDevice { private: - bool SupportsFineGrainSVM = false; CHIPDeviceOpenCL(CHIPContextOpenCL *ChipContext, cl::Device ClDevice, int Idx); cl::Device ClDevice; @@ -214,7 +229,6 @@ public: static CHIPDeviceOpenCL *create(cl::Device ClDevice, CHIPContextOpenCL *ChipContext, int Idx); cl::Device &get() { return ClDevice; } - bool supportsFineGrainSVM() { return SupportsFineGrainSVM; } virtual void populateDevicePropertiesImpl() override; // unused virtual void resetImpl() override{}; diff --git a/src/backend/OpenCL/SVMemoryRegion.cc b/src/backend/OpenCL/SVMemoryRegion.cc index 74d2aee0c..aef4f0157 100644 --- a/src/backend/OpenCL/SVMemoryRegion.cc +++ b/src/backend/OpenCL/SVMemoryRegion.cc @@ -24,33 +24,70 @@ #define SVM_ALIGNMENT 128 +void SVMemoryRegion::init(cl::Context C, cl::Device D, CHIPContextUSMExts U, + bool FineGrain, bool IntelUSM) { + Device_ = D; + Context_ = C; + USM = U; + SupportsFineGrain = FineGrain; + SupportsIntelUSM = IntelUSM; +} + SVMemoryRegion &SVMemoryRegion::operator=(SVMemoryRegion &&Rhs) { SvmAllocations_ = std::move(Rhs.SvmAllocations_); Context_ = std::move(Rhs.Context_); + Device_ = std::move(Rhs.Device_); + USM = std::move(Rhs.USM); + SupportsFineGrain = Rhs.SupportsFineGrain; + SupportsIntelUSM = Rhs.SupportsIntelUSM; return *this; } -void *SVMemoryRegion::allocate(size_t Size, SVM_ALLOC_GRANULARITY Granularity) { +void *SVMemoryRegion::allocate(size_t Size, size_t Alignment, + hipMemoryType MemType) { // 0 passed for the alignment will use the default alignment which is equal to // the largest data type supported. void *Ptr; - if (Granularity == COARSE_GRAIN) { - Ptr = ::clSVMAlloc(Context_(), CL_MEM_READ_WRITE, Size, 0); - } else { + int Err; + if (SupportsIntelUSM) { + switch (MemType) { + case hipMemoryTypeHost: + Ptr = USM.clHostMemAllocINTEL(Context_(), NULL, Size, Alignment, &Err); + break; + case hipMemoryTypeDevice: + Ptr = USM.clDeviceMemAllocINTEL(Context_(), Device_(), NULL, Size, + Alignment, &Err); + break; + case hipMemoryTypeManaged: + case hipMemoryTypeUnified: + default: + Ptr = USM.clSharedMemAllocINTEL(Context_(), Device_(), NULL, Size, + Alignment, &Err); + break; + } + } else if (SupportsFineGrain) { Ptr = ::clSVMAlloc( Context_(), CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, Size, 0); + } else { + Ptr = ::clSVMAlloc(Context_(), CL_MEM_READ_WRITE, Size, 0); } + if (Ptr) { - auto Deleter = [Ctx = this->Context_](void *PtrToFree) -> void { + auto Deleter = [Ctx = this->Context_, SupportsUSM = this->SupportsIntelUSM, + clMemFreeINTEL = + this->USM.clMemFreeINTEL](void *PtrToFree) -> void { logTrace("clSVMFree on: {}\n", PtrToFree); - clSVMFree(Ctx(), PtrToFree); + if (SupportsUSM) + clMemFreeINTEL(Ctx(), PtrToFree); + else + clSVMFree(Ctx(), PtrToFree); }; auto SPtr = std::shared_ptr(Ptr, Deleter); + logTrace("Memory allocated: {} / {}\n", Ptr, Size); SvmAllocations_.emplace(SPtr, Size); } else CHIPERR_LOG_AND_THROW("clSVMAlloc failed", hipErrorMemoryAllocation); - logTrace("clSVMAlloc allocated: {} / {}\n", Ptr, Size); return Ptr; } From 377f98dc00cae7cb550232390079bc44f127bd93 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 10 May 2023 15:14:13 +0300 Subject: [PATCH 09/35] src/CHIPBindings_spt.cc: add missing return from function --- src/CHIPBindings_spt.cc | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/CHIPBindings_spt.cc b/src/CHIPBindings_spt.cc index 9d3020946..ffb9d0b8a 100644 --- a/src/CHIPBindings_spt.cc +++ b/src/CHIPBindings_spt.cc @@ -39,7 +39,7 @@ hipError_t hipMemcpy_spt(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind) { hipMemcpyAsync(dst, src, sizeBytes, kind, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemcpyToSymbol_spt(const void *symbol, const void *src, @@ -47,7 +47,7 @@ hipError_t hipMemcpyToSymbol_spt(const void *symbol, const void *src, hipMemcpyKind kind) { hipMemcpyToSymbolAsync(symbol, src, sizeBytes, offset, kind, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemcpyFromSymbol_spt(void *dst, const void *symbol, @@ -55,7 +55,7 @@ hipError_t hipMemcpyFromSymbol_spt(void *dst, const void *symbol, hipMemcpyKind kind) { hipMemcpyFromSymbolAsync(dst, symbol, sizeBytes, offset, kind, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemcpy2D_spt(void *dst, size_t dpitch, const void *src, @@ -63,7 +63,7 @@ hipError_t hipMemcpy2D_spt(void *dst, size_t dpitch, const void *src, hipMemcpyKind kind) { hipMemcpy2DAsync(dst, dpitch, src, spitch, width, height, kind, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemcpy2DToArray_spt(hipArray *dst, size_t wOffset, size_t hOffset, @@ -71,7 +71,7 @@ hipError_t hipMemcpy2DToArray_spt(hipArray *dst, size_t wOffset, size_t hOffset, size_t height, hipMemcpyKind kind) { hipMemcpy2DToArrayAsync(dst, wOffset, hOffset, src, spitch, width, height, kind, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemcpy2DFromArray_spt(void *dst, size_t dpitch, @@ -80,66 +80,66 @@ hipError_t hipMemcpy2DFromArray_spt(void *dst, size_t dpitch, hipMemcpyKind kind) { hipMemcpy2DFromArrayAsync(dst, dpitch, src, wOffset, hOffset, width, height, kind, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemcpy3D_spt(const struct hipMemcpy3DParms *p) { hipMemcpy3DAsync(p, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemset_spt(void *dst, int value, size_t sizeBytes) { hipMemsetAsync(dst, value, sizeBytes, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemset2D_spt(void *dst, size_t pitch, int value, size_t width, size_t height) { hipMemset2DAsync(dst, pitch, value, width, height, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemset3D_spt(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { hipMemset3DAsync(pitchedDevPtr, value, extent, hipStreamPerThread); - hipStreamSynchronize(hipStreamPerThread); + return hipStreamSynchronize(hipStreamPerThread); } hipError_t hipMemcpyAsync_spt(void *dst, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { auto Queue = stream ? stream : hipStreamPerThread; - hipMemcpyAsync(dst, src, sizeBytes, kind, Queue); + return hipMemcpyAsync(dst, src, sizeBytes, kind, Queue); } hipError_t hipStreamQuery_spt(hipStream_t stream) { auto Queue = stream ? stream : hipStreamPerThread; - hipStreamQuery(Queue); + return hipStreamQuery(Queue); } hipError_t hipStreamSynchronize_spt(hipStream_t stream) { auto Queue = stream ? stream : hipStreamPerThread; - hipStreamSynchronize(Queue); + return hipStreamSynchronize(Queue); } hipError_t hipStreamGetPriority_spt(hipStream_t stream, int *priority) { auto Queue = stream ? stream : hipStreamPerThread; - hipStreamGetPriority(Queue, priority); + return hipStreamGetPriority(Queue, priority); } hipError_t hipStreamWaitEvent_spt(hipStream_t stream, hipEvent_t event, unsigned int flags) { auto Queue = stream ? stream : hipStreamPerThread; - hipStreamWaitEvent(Queue, event, flags); + return hipStreamWaitEvent(Queue, event, flags); } hipError_t hipEventRecord_spt(hipEvent_t Event, hipStream_t Stream) { auto Queue = Stream ? Stream : hipStreamPerThread; - hipEventRecord(Event, Queue); + return hipEventRecord(Event, Queue); } hipError_t hipStreamGetFlags_spt(hipStream_t stream, unsigned int *flags) { auto Queue = stream ? stream : hipStreamPerThread; - hipStreamGetFlags(Queue, flags); + return hipStreamGetFlags(Queue, flags); } hipError_t hipLaunchCooperativeKernel_spt(const void *f, dim3 gridDim, @@ -155,8 +155,8 @@ hipError_t hipLaunchKernel_spt(const void *function_address, dim3 numBlocks, dim3 dimBlocks, void **args, size_t sharedMemBytes, hipStream_t stream) { auto Queue = stream ? stream : hipStreamPerThread; - hipLaunchKernel(function_address, numBlocks, dimBlocks, args, sharedMemBytes, - Queue); + return hipLaunchKernel(function_address, numBlocks, dimBlocks, args, + sharedMemBytes, Queue); } #ifdef __cplusplus } From 09df3c89dc52bf26b4b9013fe8423b82b4450f69 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Thu, 4 May 2023 13:52:53 +0300 Subject: [PATCH 10/35] samples/hip-cuda: disable printfs on each kernel iteration --- samples/hip-cuda/BinomialOption/BinomialOption.cpp | 2 +- samples/hip-cuda/BitonicSort/BitonicSort.cpp | 2 +- samples/hip-cuda/DCT/DCT.cpp | 2 +- samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp | 2 +- samples/hip-cuda/FloydWarshall/FloydWarshall.cpp | 2 +- samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/hip-cuda/BinomialOption/BinomialOption.cpp b/samples/hip-cuda/BinomialOption/BinomialOption.cpp index f4fe3da8d..0aa7b1303 100644 --- a/samples/hip-cuda/BinomialOption/BinomialOption.cpp +++ b/samples/hip-cuda/BinomialOption/BinomialOption.cpp @@ -365,7 +365,7 @@ BinomialOption::runKernels() hipEventElapsedTime(&eventMs, start, stop); - printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); +// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); hipMemcpy(output, dout, samplesPerVectorWidth * sizeof(float4), hipMemcpyDeviceToHost); diff --git a/samples/hip-cuda/BitonicSort/BitonicSort.cpp b/samples/hip-cuda/BitonicSort/BitonicSort.cpp index 81f7b4beb..814f60d8a 100644 --- a/samples/hip-cuda/BitonicSort/BitonicSort.cpp +++ b/samples/hip-cuda/BitonicSort/BitonicSort.cpp @@ -349,7 +349,7 @@ BitonicSort::runKernels(void) hipEventElapsedTime(&eventMs, start, stop); - printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); +// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); } } hipMemcpy(input, din,length * sizeof(unsigned int), hipMemcpyDeviceToHost); diff --git a/samples/hip-cuda/DCT/DCT.cpp b/samples/hip-cuda/DCT/DCT.cpp index ecc77cff8..2d92f6879 100644 --- a/samples/hip-cuda/DCT/DCT.cpp +++ b/samples/hip-cuda/DCT/DCT.cpp @@ -405,7 +405,7 @@ DCT::runKernels(void) hipEventElapsedTime(&eventMs, start, stop); - printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); +// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); hipMemcpy(output, dout,sizeof(float) * width * height, hipMemcpyDeviceToHost); diff --git a/samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp b/samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp index c202fdc0c..039ee9ec5 100644 --- a/samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp +++ b/samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp @@ -217,7 +217,7 @@ FastWalshTransform::runKernels(void) hipEventElapsedTime(&eventMs, start, stop); - printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); +// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); } hipMemcpy(output, din, length * sizeof(float), hipMemcpyDeviceToHost); diff --git a/samples/hip-cuda/FloydWarshall/FloydWarshall.cpp b/samples/hip-cuda/FloydWarshall/FloydWarshall.cpp index 477776d39..3a0341abb 100644 --- a/samples/hip-cuda/FloydWarshall/FloydWarshall.cpp +++ b/samples/hip-cuda/FloydWarshall/FloydWarshall.cpp @@ -381,7 +381,7 @@ FloydWarshall::runKernels(void) hipEventElapsedTime(&eventMs, start, stop); - printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); + //printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); } diff --git a/samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp b/samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp index 4e6706c63..aa2eb7399 100644 --- a/samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp +++ b/samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp @@ -423,7 +423,7 @@ int DwtHaar1D::runDwtHaar1DKernel() hipEventElapsedTime(&eventMs, start, stop); - printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); +// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); hipMemcpy(dOutData, dout, signalLength * sizeof(float), hipMemcpyDeviceToHost); hipMemcpy(dPartialOutData, dpart, signalLength * sizeof(float), hipMemcpyDeviceToHost); From b7c0212b3e074dac8d5139df142de81566cd5bc9 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Fri, 5 May 2023 09:27:48 +0300 Subject: [PATCH 11/35] samples/hip-cuda: convert tests to report also Best iteration time original code reports Average iteration time --- .../BinomialOption/BinomialOption.cpp | 53 ++++++++--------- samples/hip-cuda/BitonicSort/BitonicSort.cpp | 32 ++++++---- samples/hip-cuda/DCT/DCT.cpp | 25 ++++++-- .../FastWalshTransform/FastWalshTransform.cpp | 47 ++++++++------- .../hip-cuda/FloydWarshall/FloydWarshall.cpp | 52 +++++++++------- samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp | 59 +++++++++---------- 6 files changed, 149 insertions(+), 119 deletions(-) diff --git a/samples/hip-cuda/BinomialOption/BinomialOption.cpp b/samples/hip-cuda/BinomialOption/BinomialOption.cpp index 0aa7b1303..9d32af82c 100644 --- a/samples/hip-cuda/BinomialOption/BinomialOption.cpp +++ b/samples/hip-cuda/BinomialOption/BinomialOption.cpp @@ -53,6 +53,7 @@ class BinomialOption { double setupTime; /**< Time taken to setup resources and building kernel */ double kernelTime; /**< Time taken to run kernel and read result back */ + float bestIterTimeMS; int numSamples; /**< No. of samples*/ unsigned int samplesPerVectorWidth; /**< No. of samples per vector width */ unsigned int numSteps; /**< No. of time steps*/ @@ -75,18 +76,14 @@ class BinomialOption * Initialize member variables */ BinomialOption() - : setupTime(0), - kernelTime(0), - randArray(NULL), - output(NULL), - refOutput(NULL), - iterations(1) - { - numSamples = 256; - numSteps = 254; - sampleArgs = new HIPCommandArgs() ; - sampleTimer = new SDKTimer(); - sampleArgs->sampleVerStr = SAMPLE_VERSION; + : setupTime(0), kernelTime(0), + bestIterTimeMS(std::numeric_limits::max()), + randArray(NULL), output(NULL), refOutput(NULL), iterations(1) { + numSamples = 256; + numSteps = 254; + sampleArgs = new HIPCommandArgs(); + sampleTimer = new SDKTimer(); + sampleArgs->sampleVerStr = SAMPLE_VERSION; } inline long long get_time() @@ -347,7 +344,7 @@ BinomialOption::runKernels() hipEventCreate(&start); hipEventCreate(&stop); - float eventMs = 1.0f; + float eventMs = 1000000.0f; unsigned int localThreads = {numSteps + 1}; @@ -360,14 +357,15 @@ BinomialOption::runKernels() 0, 0, numSteps ,(float4*)randBuffer ,(float4*)outBuffer); + hipMemcpy(output, dout, samplesPerVectorWidth * sizeof(float4), + hipMemcpyDeviceToHost); + hipEventRecord(stop, NULL); hipEventSynchronize(stop); hipEventElapsedTime(&eventMs, start, stop); - -// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); - - hipMemcpy(output, dout, samplesPerVectorWidth * sizeof(float4), hipMemcpyDeviceToHost); + if (eventMs < bestIterTimeMS) + bestIterTimeMS = eventMs; return SDK_SUCCESS; } @@ -539,8 +537,8 @@ int BinomialOption::run() if(!sampleArgs->quiet) { - printArray("input", randArray, numSamples, 1); - printArray("Output", output, numSamples, 1); + printArray("input", randArray, numSamples, 1); + printArray("Output", output, numSamples, 1); } return SDK_SUCCESS; @@ -589,23 +587,20 @@ void BinomialOption::printStats() { if(sampleArgs->timing) { - std::string strArray[4] = - { - "Option Samples", - "Time(sec)", - "Transfer+kernel(sec)" , - "Options/sec" - }; + std::string strArray[5] = {"Option Samples", "Time(sec)", + "Transfer+kernel(sec)", + "Best Iter. time (msec)", "Options/sec"}; sampleTimer->totalTime = setupTime + kernelTime; - std::string stats[4]; + std::string stats[5]; stats[0] = toString(numSamples, std::dec); stats[1] = toString(sampleTimer->totalTime, std::dec); stats[2] = toString(kernelTime, std::dec); - stats[3] = toString(numSamples / sampleTimer->totalTime, std::dec); + stats[3] = toString(bestIterTimeMS, std::dec); + stats[4] = toString(numSamples / sampleTimer->totalTime, std::dec); - printStatistics(strArray, stats, 4); + printStatistics(strArray, stats, 5); } } diff --git a/samples/hip-cuda/BitonicSort/BitonicSort.cpp b/samples/hip-cuda/BitonicSort/BitonicSort.cpp index 814f60d8a..eadd86104 100644 --- a/samples/hip-cuda/BitonicSort/BitonicSort.cpp +++ b/samples/hip-cuda/BitonicSort/BitonicSort.cpp @@ -44,6 +44,7 @@ class BitonicSort double totalKernelTime; /**< Time for kernel execution */ double totalProgramTime; /**< Time for program execution */ double referenceKernelTime; /**< Time for reference implementation */ + float bestIterTimeMS; unsigned int sortFlag; /**< Flag to indicate sorting order */ std::string sortOrder; /**< Argument to indicate sorting order */ unsigned int *input; /**< Input array */ @@ -69,6 +70,7 @@ class BitonicSort length = 32768; setupTime = 0; totalKernelTime = 0; + bestIterTimeMS = std::numeric_limits::max(); iterations = 1; sampleArgs = new HIPCommandArgs() ; sampleTimer = new SDKTimer(); @@ -270,7 +272,7 @@ BitonicSort::runKernels(void) hipEventCreate(&start); hipEventCreate(&stop); - float eventMs = 1.0f; + float eventMs = 1000000.0f; unsigned int numStages = 0; unsigned int temp; @@ -331,12 +333,12 @@ BitonicSort::runKernels(void) sortFlag = 0; } + hipEventRecord(start, NULL); for(stage = 0; stage < numStages; ++stage) { for(passOfStage = 0; passOfStage < stage + 1; ++passOfStage) { - hipEventRecord(start, NULL); hipLaunchKernelGGL(bitonicSort, dim3(globalThreads/localThreads), @@ -344,15 +346,18 @@ BitonicSort::runKernels(void) 0, 0, inputBuffer ,stage, passOfStage ,sortFlag); - hipEventRecord(stop, NULL); - hipEventSynchronize(stop); - - hipEventElapsedTime(&eventMs, start, stop); - // printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); } } - hipMemcpy(input, din,length * sizeof(unsigned int), hipMemcpyDeviceToHost); + hipMemcpy(input, din, length * sizeof(unsigned int), hipMemcpyDeviceToHost); + + hipEventRecord(stop, NULL); + hipEventSynchronize(stop); + + hipEventElapsedTime(&eventMs, start, stop); + if (eventMs < bestIterTimeMS) + bestIterTimeMS = eventMs; + return SDK_SUCCESS; } @@ -586,17 +591,20 @@ void BitonicSort::printStats() { if(sampleArgs->timing) { - std::string strArray[4] = {"Elements", "Setup Time (sec)", "Avg. Kernel Time (sec)", "Elements/sec"}; - std::string stats[4]; + std::string strArray[5] = {"Elements", "Setup Time (sec)", + "Avg. Kernel Time (sec)", + "Best Iter. Time (msec)", "Elements/sec"}; + std::string stats[5]; sampleTimer->totalTime = ( totalKernelTime/ iterations ); stats[0] = toString(length, std::dec); stats[1] = toString(setupTime, std::dec); stats[2] = toString(sampleTimer->totalTime, std::dec); - stats[3] = toString(( length/sampleTimer->totalTime ), std::dec); + stats[3] = toString(bestIterTimeMS, std::dec); + stats[4] = toString((length / sampleTimer->totalTime), std::dec); - printStatistics(strArray, stats, 4); + printStatistics(strArray, stats, 5); } } int BitonicSort::cleanup() diff --git a/samples/hip-cuda/DCT/DCT.cpp b/samples/hip-cuda/DCT/DCT.cpp index 2d92f6879..4779bc351 100644 --- a/samples/hip-cuda/DCT/DCT.cpp +++ b/samples/hip-cuda/DCT/DCT.cpp @@ -75,6 +75,7 @@ class DCT double totalKernelTime; /**< Time for kernel execution */ double totalProgramTime; /**< Time for program execution */ double referenceKernelTime; /**< Time for reference implementation */ + float bestIterTimeMS; int width; /**< Width of the input array */ int height; /**< height of the input array */ float *input; /**< Input array */ @@ -112,6 +113,7 @@ class DCT inverse = 0; setupTime = 0; totalKernelTime = 0; + bestIterTimeMS = std::numeric_limits::max(); iterations = 1; sampleArgs = new HIPCommandArgs() ; sampleTimer = new SDKTimer(); @@ -385,7 +387,7 @@ int DCT::runKernels(void) { hipEvent_t start, stop; - float eventMs = 1.0f; + float eventMs = 1000000.0f; hipEventCreate(&start); hipEventCreate(&stop); @@ -399,15 +401,18 @@ DCT::runKernels(void) 0, 0, outputBuffer ,inputBuffer ,dctBuffer, dct_transBuffer, width, blockWidth, inverse ); + hipMemcpy(output, dout, sizeof(float) * width * height, + hipMemcpyDeviceToHost); + hipEventRecord(stop, NULL); hipEventSynchronize(stop); - hipEventElapsedTime(&eventMs, start, stop); + if (eventMs < bestIterTimeMS) + bestIterTimeMS = eventMs; // printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); - hipMemcpy(output, dout,sizeof(float) * width * height, hipMemcpyDeviceToHost); return SDK_SUCCESS; } @@ -656,8 +661,14 @@ void DCT::printStats() { if(sampleArgs->timing) { - std::string strArray[4] = {"Width", "Height", "Time(sec)", "[Transfer+Kernel]Time(sec)"}; - std::string stats[4]; + std::string strArray[5] = { + "Width", + "Height", + "Time(sec)", + "[Transfer+Kernel]Time(sec)", + "Best Iter. time (msec)", + }; + std::string stats[5]; sampleTimer->totalTime = setupTime + totalKernelTime; @@ -665,10 +676,12 @@ void DCT::printStats() stats[1] = toString(height , std::dec); stats[2] = toString(sampleTimer->totalTime, std::dec); stats[3] = toString(totalKernelTime, std::dec); + stats[4] = toString(bestIterTimeMS, std::dec); - printStatistics(strArray, stats, 4); + printStatistics(strArray, stats, 5); } } + int DCT::cleanup() { diff --git a/samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp b/samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp index 039ee9ec5..06e8034c3 100644 --- a/samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp +++ b/samples/hip-cuda/FastWalshTransform/FastWalshTransform.cpp @@ -44,6 +44,7 @@ class FastWalshTransform double setupTime; /**< Time for setting up OpenCL */ double totalKernelTime; /**< Time for kernel execution */ double totalProgramTime; /**< Time for program execution */ + float bestIterTimeMS; double referenceKernelTime; /**< Time for reference implementation */ int length; /**< Length of the input array */ float *input; /**< Input array */ @@ -70,6 +71,7 @@ class FastWalshTransform verificationInput = NULL; setupTime = 0; totalKernelTime = 0; + bestIterTimeMS = std::numeric_limits::max(); iterations = 1; sampleArgs = new HIPCommandArgs() ; sampleTimer = new SDKTimer(); @@ -191,7 +193,10 @@ FastWalshTransform::runKernels(void) hipEventCreate(&start); hipEventCreate(&stop); - float eventMs = 1.0f; + float eventMs = 10000000.0f; + + // Record the start event + hipEventRecord(start, NULL); float *din; hipHostGetDevicePointer((void**)&din, inputBuffer,0); @@ -201,28 +206,24 @@ FastWalshTransform::runKernels(void) int globalThreads = length / 2; int localThreads = 256; - for(int step = 1; step < length; step <<= 1) - { - // Record the start event - hipEventRecord(start, NULL); + for(int step = 1; step < length; step <<= 1) { + hipLaunchKernelGGL(fastWalshTransform, + dim3(globalThreads/localThreads), + dim3(localThreads), + 0, 0, + inputBuffer ,step); + } - hipLaunchKernelGGL(fastWalshTransform, - dim3(globalThreads/localThreads), - dim3(localThreads), - 0, 0, - inputBuffer ,step); + hipMemcpy(output, din, length * sizeof(float), hipMemcpyDeviceToHost); hipEventRecord(stop, NULL); hipEventSynchronize(stop); hipEventElapsedTime(&eventMs, start, stop); + if (eventMs < bestIterTimeMS) + bestIterTimeMS = eventMs; -// printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); - } - - hipMemcpy(output, din, length * sizeof(float), hipMemcpyDeviceToHost); - - return SDK_SUCCESS; + return SDK_SUCCESS; } /* @@ -408,16 +409,22 @@ FastWalshTransform::printStats() { if(sampleArgs->timing) { - std::string strArray[3] = {"Length", "Time(sec)", "[Transfer+Kernel]Time(sec)"}; - std::string stats[3]; + std::string strArray[4] = { + "Length", + "Time(sec)", + "[Transfer+Kernel]Time(sec)", + "Best Iter. time (msec)", + }; + std::string stats[4]; - sampleTimer->totalTime = setupTime + totalKernelTime ; + sampleTimer->totalTime = setupTime + totalKernelTime; stats[0] = toString(length, std::dec); stats[1] = toString(sampleTimer->totalTime, std::dec); stats[2] = toString(totalKernelTime, std::dec); + stats[3] = toString(bestIterTimeMS, std::dec); - printStatistics(strArray, stats, 3); + printStatistics(strArray, stats, 4); } } int diff --git a/samples/hip-cuda/FloydWarshall/FloydWarshall.cpp b/samples/hip-cuda/FloydWarshall/FloydWarshall.cpp index 3a0341abb..440219f7c 100644 --- a/samples/hip-cuda/FloydWarshall/FloydWarshall.cpp +++ b/samples/hip-cuda/FloydWarshall/FloydWarshall.cpp @@ -50,6 +50,7 @@ class FloydWarshall double setupTime; /**< Time for setting up Open*/ double totalKernelTime; /**< Time for kernel execution */ double totalProgramTime; /**< Time for program execution */ + float bestIterTimeMS; double referenceKernelTime; /**< Time for reference implementation */ unsigned int numNodes; /**< Number of nodes in the graph */ unsigned int *pathDistanceMatrix; /**< path distance array */ @@ -82,6 +83,7 @@ class FloydWarshall verificationPathMatrix = NULL; setupTime = 0; totalKernelTime = 0; + bestIterTimeMS = std::numeric_limits::max(); iterations = 1; blockSize = 16; sampleArgs = new HIPCommandArgs() ; @@ -354,40 +356,40 @@ FloydWarshall::runKernels(void) float *din, *di; - hipHostGetDevicePointer((void**)&din, pathDistanceBuffer,0); - hipHostGetDevicePointer((void**)&di, pathBuffer,0); - - hipMemcpy(din, pathDistanceMatrix, sizeof(unsigned int) * numNodes * numNodes, hipMemcpyHostToDevice); - hipEvent_t start, stop; hipEventCreate(&start); hipEventCreate(&stop); - float eventMs = 1.0f; + float eventMs = 1000000.0f; - for(unsigned int i = 0; i < numPasses; i += 1) - { // Record the start event hipEventRecord(start, NULL); - hipLaunchKernelGGL(floydWarshallPass, - dim3(globalThreads[0]/localThreads[0],globalThreads[1]/localThreads[1]), - dim3(localThreads[0],localThreads[1]), - 0, 0, - pathDistanceBuffer,pathBuffer,numNodes ,i); + hipHostGetDevicePointer((void **)&din, pathDistanceBuffer, 0); + hipHostGetDevicePointer((void **)&di, pathBuffer, 0); - hipEventRecord(stop, NULL); - hipEventSynchronize(stop); - - hipEventElapsedTime(&eventMs, start, stop); + hipMemcpy(din, pathDistanceMatrix, + sizeof(unsigned int) * numNodes * numNodes, + hipMemcpyHostToDevice); - //printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); + for (unsigned int i = 0; i < numPasses; i += 1) { + hipLaunchKernelGGL(floydWarshallPass, + dim3(globalThreads[0] / localThreads[0], + globalThreads[1] / localThreads[1]), + dim3(localThreads[0], localThreads[1]), 0, 0, + pathDistanceBuffer, pathBuffer, numNodes, i); } - hipMemcpy(pathDistanceMatrix, din,numNodes * numNodes * sizeof(unsigned int), hipMemcpyDeviceToHost); hipMemcpy(pathMatrix, di,numNodes * numNodes * sizeof(unsigned int), hipMemcpyDeviceToHost); + hipEventRecord(stop, NULL); + hipEventSynchronize(stop); + + hipEventElapsedTime(&eventMs, start, stop); + if (eventMs < bestIterTimeMS) + bestIterTimeMS = eventMs; + return SDK_SUCCESS; } @@ -602,16 +604,22 @@ void FloydWarshall::printStats() { if(sampleArgs->timing) { - std::string strArray[3] = {"Nodes", "Time(sec)", "[Transfer+Kernel]Time(sec)"}; - std::string stats[3]; + std::string strArray[4] = { + "Nodes", + "Time(sec)", + "[Transfer+Kernel]Time(sec)", + "Best Iter. time (msec)", + }; + std::string stats[4]; sampleTimer->totalTime = setupTime + totalKernelTime; stats[0] = toString(numNodes, std::dec); stats[1] = toString(sampleTimer->totalTime, std::dec); stats[2] = toString(totalKernelTime, std::dec); + stats[3] = toString(bestIterTimeMS, std::dec); - printStatistics(strArray, stats, 3); + printStatistics(strArray, stats, 4); } } diff --git a/samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp b/samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp index aa2eb7399..fd7d689f9 100644 --- a/samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp +++ b/samples/hip-cuda/dwtHaar1D/dwtHaar1D.cpp @@ -50,6 +50,7 @@ class DwtHaar1D float *hOutData; /**< output data calculated on host */ double setupTime; /**< time taken to setup resources and building kernel */ double kernelTime; /**< time taken to run kernel and read result back */ + float bestIterTimeMS; float* inDataBuf; /**< memory buffer for input data */ float* dOutDataBuf; /**< memory buffer for output data */ float* dPartialOutDataBuf; /**< memory buffer for paritial decomposed signal */ @@ -69,19 +70,13 @@ class DwtHaar1D * @param name name of sample (string) */ DwtHaar1D() - : - signalLength(SIGNAL_LENGTH), - setupTime(0), - kernelTime(0), - inData(NULL), - dOutData(NULL), - dPartialOutData(NULL), - hOutData(NULL), - iterations(1) - { - sampleArgs = new HIPCommandArgs() ; - sampleTimer = new SDKTimer(); - sampleArgs->sampleVerStr = SAMPLE_VERSION; + : signalLength(SIGNAL_LENGTH), setupTime(0), kernelTime(0), + bestIterTimeMS(std::numeric_limits::max()), inData(NULL), + dOutData(NULL), dPartialOutData(NULL), hOutData(NULL), + iterations(1) { + sampleArgs = new HIPCommandArgs(); + sampleTimer = new SDKTimer(); + sampleArgs->sampleVerStr = SAMPLE_VERSION; } inline long long get_time() @@ -403,26 +398,12 @@ int DwtHaar1D::runDwtHaar1DKernel() hipMemcpy(din, inData, sizeof(float) * curSignalLength, hipMemcpyHostToDevice); - hipEvent_t start, stop; - - hipEventCreate(&start); - hipEventCreate(&stop); - float eventMs = 1.0f; - - // Record the start event - hipEventRecord(start, NULL); - hipLaunchKernelGGL(dwtHaar1D, dim3(globalThreads/localThreads), dim3(localThreads), 0, 0, inDataBuf ,dOutDataBuf ,dPartialOutDataBuf, totalLevels, curSignalLength,levelsDone, maxLevelsOnDevice); - hipEventRecord(stop, NULL); - hipEventSynchronize(stop); - - hipEventElapsedTime(&eventMs, start, stop); - // printf ("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); hipMemcpy(dOutData, dout, signalLength * sizeof(float), hipMemcpyDeviceToHost); @@ -453,6 +434,15 @@ DwtHaar1D::runKernels(void) float* temp = (float*)malloc(signalLength * sizeof(float)); memcpy(temp, inData, signalLength * sizeof(float)); + hipEvent_t start, stop; + + hipEventCreate(&start); + hipEventCreate(&stop); + float eventMs = 10000000.0f; + + // Record the start event + hipEventRecord(start, NULL); + levelsDone = 0; int one = 1; while((unsigned int)levelsDone < actualLevels) @@ -493,6 +483,12 @@ DwtHaar1D::runKernels(void) } + hipEventRecord(stop, NULL); + hipEventSynchronize(stop); + + hipEventElapsedTime(&eventMs, start, stop); + if (eventMs < bestIterTimeMS) + bestIterTimeMS = eventMs; memcpy(inData, temp, signalLength * sizeof(float)); free(temp); @@ -642,15 +638,18 @@ void DwtHaar1D::printStats() { if(sampleArgs->timing) { - std::string strArray[3] = {"SignalLength", "Time(sec)", "[Transfer+Kernel]Time(sec)"}; + std::string strArray[4] = {"SignalLength", "Time(sec)", + "[Transfer+Kernel]Time(sec)", + "Best Iter. time (msec)"}; sampleTimer->totalTime = setupTime + kernelTime; - std::string stats[3]; + std::string stats[4]; stats[0] = toString(signalLength, std::dec); stats[1] = toString(sampleTimer->totalTime, std::dec); stats[2] = toString(kernelTime, std::dec); + stats[3] = toString(bestIterTimeMS, std::dec); - printStatistics(strArray, stats, 3); + printStatistics(strArray, stats, 4); } } From a6e27507b58e9b9587d5f3db703ab85a2cfd7809 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 10 May 2023 11:03:09 +0300 Subject: [PATCH 12/35] update unit tests with failing/passing Graph tests --- cmake/UnitTests.cmake | 677 +++++++++++++++++------------------------- 1 file changed, 265 insertions(+), 412 deletions(-) diff --git a/cmake/UnitTests.cmake b/cmake/UnitTests.cmake index 39e185304..8dbeb3367 100644 --- a/cmake/UnitTests.cmake +++ b/cmake/UnitTests.cmake @@ -534,72 +534,72 @@ list(APPEND IGPU_OPENCL_FAILED_TESTS "hipStreamSemantics") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "deviceMallocCompile") # Unimplemented list(APPEND IGPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "cuda-qrng") # Subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # Subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # subprocess aborted +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # subprocess aborted +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # (Timeout) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # (SEGFAULT) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) +list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT @@ -631,35 +631,6 @@ list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -672,12 +643,6 @@ list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Fail list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -897,7 +862,6 @@ list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamAddCallback_StrmSyncTiming") list(APPEND IGPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "stream") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "hipKernelLaunchIsNonBlocking") # Timeout -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Subprocess aborted list(APPEND IGPU_OPENCL_FAILED_TESTS "syncthreadsExitedThreads") # Timeout # Timeout or out-of-resources error in the CI which emulates double FPs. list(APPEND IGPU_OPENCL_FAILED_TESTS "TestStlFunctionsDouble") @@ -911,74 +875,72 @@ list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-asyncAPI") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-matrixMul") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-qrng") # Subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # Timeout -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # Timeout -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # Subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Timeout -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # subprocess aborted +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # subprocess aborted +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # (Timeout) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # (SEGFAULT) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) +list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT @@ -1010,35 +972,6 @@ list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -1051,12 +984,6 @@ list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Fail list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemcpy2DToArray_PinnedMemSameGPU") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemcpy2DToArrayAsync_PinnedHostMemSameGpu") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemcpy2D_H2D-D2D-D2H - int") # Failed @@ -1314,7 +1241,6 @@ list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemsetFunctional_PartialSet_3D") # list(APPEND DGPU_OPENCL_FAILED_TESTS "hipMultiThreadAddCallback") # Subprocess aborted list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemset3DAsync_ConcurrencyMthread") # Timeout list(APPEND DGPU_OPENCL_FAILED_TESTS "hipKernelLaunchIsNonBlocking") # Timeout -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Subprocess aborted list(APPEND DGPU_OPENCL_FAILED_TESTS "syncthreadsExitedThreads") # Timeout # dGPU Level Zero Unit Test Failures @@ -1330,74 +1256,72 @@ list(APPEND DGPU_LEVEL0_FAILED_TESTS "cuda-clock") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "cuda-simpleTemplates") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "cuda-dwtHaar1D") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # Timeout -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # Timeout -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # Subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # Subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # subprocess aborted +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # subprocess aborted +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # (Timeout) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # (SEGFAULT) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) +list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT @@ -1430,34 +1354,6 @@ list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Timeout -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -1470,12 +1366,6 @@ list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Fail list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -1697,7 +1587,6 @@ list(APPEND DGPU_LEVEL0_FAILED_TESTS "hipDynamicShared") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipMemFaultStackAllocation_Check") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "hip_sycl_interop") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "hipKernelLaunchIsNonBlocking") # Timeout -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Subprocess aborted list(APPEND DGPU_LEVEL0_FAILED_TESTS "syncthreadsExitedThreads") # Timeout # iGPU Level Zero Unit Test Failures @@ -1709,73 +1598,74 @@ list(APPEND IGPU_LEVEL0_FAILED_TESTS "cuda-clock") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "cuda-simpleTemplates") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "cuda-dwtHaar1D") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # Subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # Subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Timeout -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # (SEGFAULT) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # subprocess aborted +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # subprocess aborted +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # (Timeout) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Timeout) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # (SEGFAULT) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT +list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_InterStrmEventSync_defaultflag") # SEGFAULT @@ -1805,34 +1695,6 @@ list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Timeout -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -1845,12 +1707,6 @@ list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Fail list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -2069,12 +1925,9 @@ list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipPeekAtLastError_Positive_Basic") # list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipPeekAtLastError_Positive_Threaded") # Subprocess aborted list(APPEND IGPU_LEVEL0_FAILED_TESTS "hipDynamicShared") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "hipDynamicShared2") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # Timeout -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # Timeout list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipMemFaultStackAllocation_Check") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "hip_sycl_interop") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "hipKernelLaunchIsNonBlocking") # Timeout -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Subprocess aborted list(APPEND IGPU_LEVEL0_FAILED_TESTS "syncthreadsExitedThreads") # Timeout list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipMemset3DAsync_ConcurrencyMthread") # Flaky. An event related issue. From 10b2fa1b901194f083b6897c23af3dcbc978b1bc Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Thu, 11 May 2023 21:12:03 +0300 Subject: [PATCH 13/35] update test_lists again, move graphs to FAILING_FOR_ALL --- cmake/UnitTests.cmake | 539 ++++++------------------------------------ 1 file changed, 70 insertions(+), 469 deletions(-) diff --git a/cmake/UnitTests.cmake b/cmake/UnitTests.cmake index 8dbeb3367..39e230b6c 100644 --- a/cmake/UnitTests.cmake +++ b/cmake/UnitTests.cmake @@ -150,6 +150,74 @@ list(APPEND FAILING_FOR_ALL "Unit_deviceFunctions_CompileTest_atomicXor_system_u list(APPEND FAILING_FOR_ALL "hipStreamSemantics") # SEGFAULT - likely due to main thread exiting without calling join list(APPEND FAILING_FOR_ALL "Unit_hipMultiStream_multimeDevice") # SEGFAULT - likely due to multiple GPU support +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddDependencies_NegTest") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted +list(APPEND FAILING_FOR_ALL "Unit_hipGraph_BasicFunctional") # subprocess aborted +list(APPEND FAILING_FOR_ALL "Unit_hipGraph_SimpleGraphWithKernel") # subprocess aborted +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetNodes_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetRootNodes_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetEdges_Functionality") # (Timeout) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecUpdate_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT +list(APPEND FAILING_FOR_ALL "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphLaunch_Negative") # (SEGFAULT) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphReleaseUserObject_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted + # CPU OpenCL Unit Test Failures list(APPEND CPU_OPENCL_FAILED_TESTS "hipStreamSemantics") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipTextureFetch_vector") # LLVM-16 Failures @@ -165,72 +233,6 @@ list(APPEND CPU_OPENCL_FAILED_TESTS "unroll") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "hipConstantTestDeviceSymbol") # Subprocess aborted list(APPEND CPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "cuda-qrng") # Subprocess aborted -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # Subprocess aborted -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Subprocess aborted -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -261,35 +263,6 @@ list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # F list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -302,12 +275,6 @@ list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Faile list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -525,7 +492,6 @@ list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipEvent") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipMallocPitch_KernelLaunch - int") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipMallocPitch_KernelLaunch - float") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipMallocPitch_KernelLaunch - double") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "syncthreadsExitedThreads") # Timeout list(APPEND CPU_OPENCL_FAILED_TESTS "hipMultiThreadAddCallback") # SEGFAULT @@ -534,73 +500,6 @@ list(APPEND IGPU_OPENCL_FAILED_TESTS "hipStreamSemantics") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "deviceMallocCompile") # Unimplemented list(APPEND IGPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "cuda-qrng") # Subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # (Timeout) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # (SEGFAULT) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -875,73 +774,6 @@ list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-asyncAPI") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-matrixMul") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-qrng") # Subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # (Timeout) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # (SEGFAULT) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -1256,73 +1088,6 @@ list(APPEND DGPU_LEVEL0_FAILED_TESTS "cuda-clock") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "cuda-simpleTemplates") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "cuda-dwtHaar1D") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # (Timeout) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # (SEGFAULT) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -1598,74 +1363,7 @@ list(APPEND IGPU_LEVEL0_FAILED_TESTS "cuda-clock") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "cuda-simpleTemplates") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "cuda-dwtHaar1D") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # (SEGFAULT) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_SimpleGraphWithKernel") # subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # (Timeout) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Timeout) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # (SEGFAULT) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_InterStrmEventSync_defaultflag") # SEGFAULT @@ -1966,70 +1664,6 @@ list(APPEND CPU_POCL_FAILED_TESTS "cuda-scan") # Failed list(APPEND CPU_POCL_FAILED_TESTS "cuda-sortnet") # Failed list(APPEND CPU_POCL_FAILED_TESTS "cuda-FDTD3d") # Failed list(APPEND CPU_POCL_FAILED_TESTS "cuda-sobolqrng") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -2060,35 +1694,6 @@ list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # Fai list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Timeout -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -2101,12 +1706,6 @@ list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -2327,6 +1926,8 @@ list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipDeviceSynchronize_Functional") # Fail list(APPEND CPU_POCL_FAILED_TESTS "syncthreadsExitedThreads") # Timeout list(APPEND CPU_POCL_FAILED_TESTS "hip_sycl_interop") # #terminate called after throwing an instance of 'sycl::_V1::runtime_error' what(): No device of requested type available list(APPEND CPU_POCL_FAILED_TESTS "hip_sycl_interop_no_buffers") # #terminate called after throwing an instance of 'sycl::_V1::runtime_error' what(): No device of requested type available +list(APPEND CPU_POCL_FAILED_TESTS "graphMatrixMultiply") # SEGFAULT inside kernel + # broken tests, they all try to write outside allocated memory; # valgrind + pocl shows: # From 781d7a90b9294ee47f7480c4045ccf9aae363cc7 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Thu, 11 May 2023 22:40:41 +0300 Subject: [PATCH 14/35] fix a problem with UserEvents released too early --- src/backend/OpenCL/CHIPBackendOpenCL.cc | 11 ++++++----- src/backend/OpenCL/CHIPBackendOpenCL.hh | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 9a4419bfd..b3144bbec 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -967,7 +967,9 @@ void CL_CALLBACK pfn_notify(cl_event Event, cl_int CommandExecStatus, return; Cbo->Callback(Cbo->Stream, Cbo->Status, Cbo->UserData); if (Cbo->CallbackFinishEvent != nullptr) { - static_cast(Cbo->CallbackFinishEvent->get()).setStatus(CL_COMPLETE); + logTrace("Cbo->CallbackFinishEvent: {}", (void *)Cbo->CallbackFinishEvent); + cl_event Ev = Cbo->CallbackFinishEvent->get().get(); + clSetUserEventStatus(Ev, CL_COMPLETE); Cbo->CallbackFinishEvent->decreaseRefCount("Notified finished."); } delete Cbo; @@ -1016,7 +1018,6 @@ void CHIPQueueOpenCL::MemUnmap(const AllocationInfo *AllocInfo) { void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, void *UserData) { - logTrace("CHIPQueueOpenCL::addCallback()"); cl::Context &ClContext_ = ((CHIPContextOpenCL *)ChipContext_)->get(); cl_int Err; @@ -1025,7 +1026,7 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); cl::UserEvent HoldBackClEvent(ClContext_, &Err); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd); - HoldBackEvent->reset(HoldBackClEvent()); + HoldBackEvent->reset(std::move(HoldBackClEvent)); std::vector WaitForEvents{HoldBackEvent}; auto LastEvent = getLastEvent(); @@ -1046,7 +1047,7 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); cl::UserEvent CallbackClEvent(ClContext_, &Err); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd); - CallbackEvent->reset(CallbackClEvent()); + CallbackEvent->reset(std::move(CallbackClEvent)); // Make the succeeding commands wait for the user event which will be // set CL_COMPLETE by the callback trampoline function pfn_notify after @@ -1067,7 +1068,7 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, ClQueue.flush(); // Now the CB can start executing in the background: - HoldBackClEvent.setStatus(CL_COMPLETE); + HoldBackEvent->getAsUserEv().setStatus(CL_COMPLETE); return; }; diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index 07e15ae39..fa00504b4 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -104,8 +104,23 @@ public: virtual bool updateFinishStatus(bool ThrowErrorIfNotReady = true) override; cl::Event &get() { return ClEvent; } - void reset(cl::Event &&Ev) { ClEvent = Ev; } - void reset(cl_event Ev) { ClEvent = Ev; } + cl::UserEvent &getAsUserEv() { return static_cast(ClEvent); } + void reset(cl::UserEvent &&Ev) { + ClEvent = Ev; + logTrace("UserEvent {} Moved into {} || NOW: {}", (void *)Ev.get(), + (void *)this, (void *)ClEvent.get()); + } + + void reset(cl::Event &&Ev) { + ClEvent = Ev; + logTrace("Event {} Moved into {} || NOW: {}", (void *)Ev.get(), + (void *)this, (void *)ClEvent.get()); + } + void reset(cl_event Ev) { + ClEvent = Ev; + logTrace("Event {} Moved into {} ||| NOW: {}", (void *)Ev, (void *)this, + (void *)ClEvent.get()); + } // for elapsedTime uint64_t getFinishTime(); From 7264667cd02da743da631a1eff893a9622b22de4 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Fri, 12 May 2023 14:11:18 +0300 Subject: [PATCH 15/35] disable USM memory allocation types other than Shared enabling Device/Host USM types make some Texture tests fail --- src/backend/OpenCL/SVMemoryRegion.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/backend/OpenCL/SVMemoryRegion.cc b/src/backend/OpenCL/SVMemoryRegion.cc index aef4f0157..6b06e64b6 100644 --- a/src/backend/OpenCL/SVMemoryRegion.cc +++ b/src/backend/OpenCL/SVMemoryRegion.cc @@ -51,6 +51,10 @@ void *SVMemoryRegion::allocate(size_t Size, size_t Alignment, int Err; if (SupportsIntelUSM) { switch (MemType) { + + // TODO: investigate. Uncommenting this code makes + // a bunch of Unit_hipTexture tests fail with segfault. + /* case hipMemoryTypeHost: Ptr = USM.clHostMemAllocINTEL(Context_(), NULL, Size, Alignment, &Err); break; @@ -60,6 +64,7 @@ void *SVMemoryRegion::allocate(size_t Size, size_t Alignment, break; case hipMemoryTypeManaged: case hipMemoryTypeUnified: + */ default: Ptr = USM.clSharedMemAllocINTEL(Context_(), Device_(), NULL, Size, Alignment, &Err); From f298848c30e2fc0954edf662dce00873f5fefbca Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Sun, 14 May 2023 11:16:15 +0300 Subject: [PATCH 16/35] disable USM support by default, can be enabled with CMake option --- CHIPSPVConfig.hh.in | 2 ++ CMakeLists.txt | 2 +- src/backend/OpenCL/CHIPBackendOpenCL.cc | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHIPSPVConfig.hh.in b/CHIPSPVConfig.hh.in index 10e48ec6e..4af52871e 100644 --- a/CHIPSPVConfig.hh.in +++ b/CHIPSPVConfig.hh.in @@ -47,4 +47,6 @@ #cmakedefine CHIP_DEFAULT_WARP_SIZE @DEFAULT_WARP_SIZE@ +#cmakedefine INTEL_USM_SUPPORT + #endif diff --git a/CMakeLists.txt b/CMakeLists.txt index 643869964..6f86f76a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,7 +190,7 @@ option(USE_EXTERNAL_HIP_TESTS "Use Catch2 tests from the hip-tests submodule" OF option(USE_OCML_ROUNDED_OPS "Use OCML implementations for devicelib functions with explicit rounding mode such as __dadd_rd. Otherwise, rounding mode will be ignored" OFF) option(CHIP_ENABLE_NON_COMPLIANT_DEVICELIB_CODE "Enable non-compliant devicelib code such as calling LLVM builtins from inside kernel code. Enables certain unsigned long devicelib func variants" OFF) option(CHIP_FAST_MATH "Use native_ OpenCL functions which are fast but their precision is implementation defined" OFF) - +option(INTEL_USM_SUPPORT "enable support for cl_intel_unified_shared_memory in the OpenCL backend" OFF) # Warpsize would optimally be a device-specific, queried and made # effective at runtime. However, we need to fix the warpsize since SPIR-Vs need # to be portable across multiple devices. It should be more portable to diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index b3144bbec..2be53f19e 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -903,8 +903,12 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, } #endif +#ifdef INTEL_USM_SUPPORT SupportsIntelUSM = DevExts.find("cl_intel_unified_shared_memory") != std::string::npos; +#else + SupportsIntelUSM = false; +#endif if (SupportsIntelUSM) { logDebug("Device supports Intel USM"); Exts.USM.clSharedMemAllocINTEL = From a9f09e73c053134edeb6358cbf81ff9a18d3cef7 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 12:44:43 +0300 Subject: [PATCH 17/35] Update src/CHIPBackend.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/CHIPBackend.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 8208ed3b9..791a95a4d 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -465,9 +465,8 @@ void *CHIPArgSpillBuffer::allocate(const SPVFuncInfo::Arg &Arg) { void CHIPExecItem::copyArgs(void **Args) { // args need to be set up again ArgsSetup = false; - for (size_t i = 0; i < getNumArgs(); i++) { + for (size_t i = 0; i < getNumArgs(); i++) Args_.push_back(Args[i]); - } } CHIPExecItem::CHIPExecItem(dim3 GridDim, dim3 BlockDim, size_t SharedMem, From 7ee740fba9c99268faf9095ebb7ee454f4573fa9 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 12:46:06 +0300 Subject: [PATCH 18/35] Update src/CHIPBindings.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/CHIPBindings.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 4550cb9e2..0eafb60f3 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -381,10 +381,9 @@ hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t *pRootNodes, RETURN(hipErrorInvalidValue); size_t ToCopy = pNumRootNodes ? *pNumRootNodes : Nodes.size(); memcpy(pRootNodes, Nodes.data(), ToCopy * sizeof(CHIPGraphNode *)); - } else { + } else // numNodes && pRootNodes == nullptr *pNumRootNodes = Nodes.size(); - } RETURN(hipSuccess); CHIP_CATCH From 79edc5288430b8fd0a566e23b5146ff8f5bee1b2 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 12:47:29 +0300 Subject: [PATCH 19/35] Style fix src/CHIPGraph.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/CHIPGraph.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index 69e0895c3..f2ab92ad9 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -255,9 +255,8 @@ void CHIPGraphExec::launch(CHIPQueue *Queue) { while (ExecQueueCopy.size()) { auto Nodes = ExecQueueCopy.front(); std::string NodesInThisLevel = ""; - for (auto Node : Nodes) { + for (auto Node : Nodes) NodesInThisLevel += Node->Msg + " "; - } logDebug("Executing nodes: {}", NodesInThisLevel); for (auto Node : Nodes) { logDebug("Executing {}", Node->Msg); From b84015790927e332338a9923d172eb8ae8c04c39 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 12:48:10 +0300 Subject: [PATCH 20/35] Style fix src/backend/OpenCL/CHIPBackendOpenCL.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/backend/OpenCL/CHIPBackendOpenCL.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 2be53f19e..4e666594e 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -583,9 +583,8 @@ size_t CHIPEventOpenCL::getCHIPRefc() { if (Err != CL_SUCCESS) { logError("failed to get Reference count from OpenCL event"); return 0; - } else { - return RefC; } + return RefC; } bool CHIPEventOpenCL::wait() { From ba7433089f78a41897e064a93cfc12257fc725a5 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 12:48:55 +0300 Subject: [PATCH 21/35] Style fix src/CHIPGraph.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/CHIPGraph.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index f2ab92ad9..86f8eec72 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -238,9 +238,8 @@ bool CHIPGraphExec::tryLaunchNative(CHIPQueue *Queue) { logDebug("NativeGraph: launched"); Queue->finish(); return true; - } else { - return false; } + return false; } void CHIPGraphExec::launch(CHIPQueue *Queue) { From 0aec729442070c97883a92a15704eb6f4e5bb5f4 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:18:50 +0300 Subject: [PATCH 22/35] Style fix src/backend/OpenCL/CHIPBackendOpenCL.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/backend/OpenCL/CHIPBackendOpenCL.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 4e666594e..38298210a 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -922,9 +922,8 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, Exts.USM.clMemFreeINTEL = (clMemFreeINTEL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clMemFreeINTEL"); - } else { + } else logDebug("Device does not support Intel USM"); - } cl_device_svm_capabilities DeviceSVMCapabilities; int Err = Dev.getInfo(CL_DEVICE_SVM_CAPABILITIES, &DeviceSVMCapabilities); From e806f1911bea4a7f532eda3e94cb5f3d31b9fb00 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:19:15 +0300 Subject: [PATCH 23/35] Style fix src/backend/OpenCL/CHIPBackendOpenCL.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/backend/OpenCL/CHIPBackendOpenCL.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 38298210a..bc0b42cac 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -930,11 +930,10 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd); SupportsFineGrainSVM = DeviceSVMCapabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER; - if (SupportsFineGrainSVM) { + if (SupportsFineGrainSVM) logTrace("Device supports fine grain SVM"); - } else { + else logTrace("Device does not support fine grain SVM"); - } ClContext = CtxIn; SvmMemory.init(CtxIn, Dev, Exts.USM, SupportsFineGrainSVM, SupportsIntelUSM); From 2f9f86e3b5f3e6010cecee0c890cfdd17af3ab05 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:19:45 +0300 Subject: [PATCH 24/35] Style fix src/backend/OpenCL/SVMemoryRegion.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/backend/OpenCL/SVMemoryRegion.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/backend/OpenCL/SVMemoryRegion.cc b/src/backend/OpenCL/SVMemoryRegion.cc index 6b06e64b6..cc76b8a23 100644 --- a/src/backend/OpenCL/SVMemoryRegion.cc +++ b/src/backend/OpenCL/SVMemoryRegion.cc @@ -70,12 +70,11 @@ void *SVMemoryRegion::allocate(size_t Size, size_t Alignment, Alignment, &Err); break; } - } else if (SupportsFineGrain) { + else if (SupportsFineGrain) Ptr = ::clSVMAlloc( Context_(), CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, Size, 0); - } else { + else Ptr = ::clSVMAlloc(Context_(), CL_MEM_READ_WRITE, Size, 0); - } if (Ptr) { auto Deleter = [Ctx = this->Context_, SupportsUSM = this->SupportsIntelUSM, From e6aa2070ca822debe77163d5d8a13e0b7eea66af Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:20:37 +0300 Subject: [PATCH 25/35] Avoid unnecessary copy in Kernel->getName() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/backend/OpenCL/CHIPBackendOpenCL.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index fa00504b4..39cfd8059 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -352,7 +352,7 @@ public: virtual ~CHIPKernelOpenCL() {} SPVFuncInfo *getFuncInfo() const { return FuncInfo_; } - std::string getName() { return Name_; } + const std::string &getName() const { return Name_; } cl::Kernel &get() { return OclKernel_; } CHIPKernelOpenCL *clone(); From fb5d7e2578059b0c83cf084debcb241851c2602d Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:21:02 +0300 Subject: [PATCH 26/35] add const to bool methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/backend/OpenCL/CHIPBackendOpenCL.hh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index 39cfd8059..3d644a9a9 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -228,10 +228,10 @@ public: const SVMemoryRegion &getRegion() const { return SvmMemory; } virtual void freeImpl(void *Ptr) override; cl::Context &get() { return ClContext; } - bool supportsCommandBuffers() { return SupportsCommandBuffers; } - bool supportsCommandBuffersSVM() { return SupportsCommandBuffersSVM; } - bool supportsCommandBuffersHost() { return SupportsCommandBuffersHost; } - const CHIPContextClExts *exts() { return &Exts; } + bool supportsCommandBuffers() const { return SupportsCommandBuffers; } + bool supportsCommandBuffersSVM() const { return SupportsCommandBuffersSVM; } + bool supportsCommandBuffersHost() const { return SupportsCommandBuffersHost; } + const CHIPContextClExts *exts() const { return &Exts; } }; class CHIPDeviceOpenCL : public CHIPDevice { From 29651da863e0387aa39d3760618f317bd7eca68b Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:22:08 +0300 Subject: [PATCH 27/35] Style fix src/backend/OpenCL/CHIPBackendOpenCL.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/backend/OpenCL/CHIPBackendOpenCL.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index bc0b42cac..db2db9c4f 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -1590,11 +1590,10 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( Handle, nullptr, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); - } else { + } else Status = Exts->clCommandSVMMemcpyPOCL( Handle, nullptr, Dst, Src, Size, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); - } } return Status == CL_SUCCESS; From 282a90c004f93e3ee9c2a821624cdc1d2fc52b69 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:22:52 +0300 Subject: [PATCH 28/35] Style fix src/backend/OpenCL/CHIPBackendOpenCL.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/backend/OpenCL/CHIPBackendOpenCL.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index db2db9c4f..728a4dcda 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -1459,11 +1459,10 @@ bool CHIPGraphNativeOpenCL::finalize() { if (Status == CL_SUCCESS) { Finalized = true; return true; - } else { - logError("clFinalizeCommandBufferKHR FAILED with status {}", - resultToString(Status)); - return false; } + logError("clFinalizeCommandBufferKHR FAILED with status {}", + resultToString(Status)); + return false; } CHIPGraphNativeOpenCL::~CHIPGraphNativeOpenCL() { From 1e1fafcbf336a115b9a348b1175f823033867852 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:25:46 +0300 Subject: [PATCH 29/35] Style fix src/CHIPGraph.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/CHIPGraph.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index 86f8eec72..3149b182b 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -207,9 +207,9 @@ void CHIPGraph::removeNode(CHIPGraphNode *Node) { bool CHIPGraphExec::tryLaunchNative(CHIPQueue *Queue) { bool UsedNativeGraph = false; if (NativeGraph) { - if (NativeGraph->isFinalized()) { + if (NativeGraph->isFinalized()) logDebug("NativeGraph: launching existing graph"); - } else { + else { logDebug("NativeGraph: constructed but failed to finalize"); return false; } From 84ab96d116c6c6f7d889665dece12bfc517b7250 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:26:06 +0300 Subject: [PATCH 30/35] Style fix src/CHIPBindings.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/CHIPBindings.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 0eafb60f3..6e702ed5a 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -289,9 +289,7 @@ hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t *from, const hipGraphNode_t *to, size_t numDependencies) { CHIP_TRY - if (!graph) - RETURN(hipErrorInvalidValue); - if (!from || !to) + if (!graph || !from || !to) RETURN(hipErrorInvalidValue); CHIPInitialize(); CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); From d922f7587c609708c4cbacf54a99874696e232ba Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:41:32 +0300 Subject: [PATCH 31/35] Style fix src/CHIPGraph.cc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/CHIPGraph.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index 3149b182b..9088428cf 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -55,14 +55,12 @@ void CHIPGraphNode::DFS(std::vector CurrPath, void CHIPGraphNode::checkDependencies(size_t numDependencies, CHIPGraphNode **pDependencies) { - if (numDependencies > 0 && pDependencies == nullptr) { + if (numDependencies > 0 && pDependencies == nullptr) CHIPERR_LOG_AND_THROW("numDependencies > 0 && pDependencies == nullptr", hipErrorInvalidValue); - } - if (numDependencies == 0 && pDependencies != nullptr) { + if (numDependencies == 0 && pDependencies != nullptr) CHIPERR_LOG_AND_THROW("numDependencies == 0 && pDependencies != nullptr", hipErrorInvalidValue); - } } CHIPGraph::CHIPGraph(const CHIPGraph &OriginalGraph) { From 79a6e38f1a42f92a52adc9fbc56a49f03987442b Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:43:32 +0300 Subject: [PATCH 32/35] Style fixes --- src/CHIPBindings.cc | 5 +---- src/CHIPGraph.cc | 3 +-- src/backend/OpenCL/CHIPBackendOpenCL.cc | 14 +++++++------- src/backend/OpenCL/CHIPBackendOpenCL.hh | 18 +++++++++--------- src/backend/OpenCL/SVMemoryRegion.cc | 2 +- 5 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 6e702ed5a..31b0b0d39 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -350,17 +350,14 @@ hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, CHIP_TRY if (!graph || !numNodes) RETURN(hipErrorInvalidValue); - // if (!nodes && !numNodes) - // RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Nodes = GRAPH(graph)->getNodes(); if (nodes) { - if (numNodes && (*numNodes > Nodes.size())) + if (*numNodes > Nodes.size()) RETURN(hipErrorInvalidValue); size_t ToCopy = numNodes ? *numNodes : Nodes.size(); memcpy(nodes, Nodes.data(), ToCopy * sizeof(CHIPGraphNode *)); } else { - // numNodes && nodes == nullptr *numNodes = Nodes.size(); } RETURN(hipSuccess); diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index 9088428cf..4a100cd95 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -217,13 +217,12 @@ bool CHIPGraphExec::tryLaunchNative(CHIPQueue *Queue) { if (!NativeGraph) return false; - for (auto &Node : OriginalGraph_->getNodes()) { + for (auto &Node : OriginalGraph_->getNodes()) if (!NativeGraph->addNode(Node)) { logError("NativeGraph: failed to add node of type: {}", Node->getType()); return false; } - } if (!NativeGraph->finalize()) { logDebug("NativeGraph: failed to finalize"); diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 728a4dcda..885ee1908 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -524,7 +524,7 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { CHIPEventOpenCL::CHIPEventOpenCL(CHIPContextOpenCL *ChipContext, cl_event ClEvent, CHIPEventFlags Flags, bool UserEvent) - : CHIPEvent((CHIPContext *)(ChipContext), Flags), ClEvent(ClEvent) { + : CHIPEvent((CHIPContext *)(ChipContext), Flags), ClEvent_(ClEvent) { UserEvent_ = UserEvent; } @@ -535,7 +535,7 @@ CHIPEventOpenCL::CHIPEventOpenCL(CHIPContextOpenCL *ChipContext, uint64_t CHIPEventOpenCL::getFinishTime() { int Status = CL_SUCCESS; uint64_t Ret; - Ret = ClEvent.getProfilingInfo(&Status); + Ret = ClEvent_.getProfilingInfo(&Status); if (Status != CL_SUCCESS) { logError("Failed to query event for profiling info."); @@ -562,7 +562,7 @@ void CHIPEventOpenCL::recordStream(CHIPQueue *ChipQueue) { CHIPEventOpenCL *Marker = (CHIPEventOpenCL *)ChipQueue->enqueueMarkerImpl(); // see operator=() on cl::Event // should automatically release ClEvent if it already contains valid handle - ClEvent = Marker->ClEvent; + ClEvent_ = Marker->ClEvent_; Msg = "recordStreamMarker"; EventStatus_ = EVENT_STATUS_RECORDING; delete Marker; @@ -579,7 +579,7 @@ void CHIPEventOpenCL::recordStream(CHIPQueue *ChipQueue) { size_t CHIPEventOpenCL::getCHIPRefc() { int Err = CL_SUCCESS; - size_t RefC = ClEvent.getInfo(&Err); + size_t RefC = ClEvent_.getInfo(&Err); if (Err != CL_SUCCESS) { logError("failed to get Reference count from OpenCL event"); return 0; @@ -595,20 +595,20 @@ bool CHIPEventOpenCL::wait() { return false; } - auto Status = ClEvent.wait(); + auto Status = ClEvent_.wait(); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); return true; } bool CHIPEventOpenCL::updateFinishStatus(bool ThrowErrorIfNotReady) { logTrace("CHIPEventOpenCL::updateFinishStatus()"); - if (ThrowErrorIfNotReady && ClEvent.get() == nullptr) + if (ThrowErrorIfNotReady && ClEvent_.get() == nullptr) CHIPERR_LOG_AND_THROW("OpenCL has not been initialized cl_event is null", hipErrorNotReady); int Status = CL_SUCCESS; int UpdatedStatus = - ClEvent.getInfo(&Status); + ClEvent_.getInfo(&Status); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); if (ThrowErrorIfNotReady && UpdatedStatus != CL_COMPLETE) { CHIPERR_LOG_AND_THROW("Event not yet ready", hipErrorNotReady); diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index 3d644a9a9..b8b6c50cd 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -85,7 +85,7 @@ public: class CHIPEventOpenCL : public CHIPEvent { private: - cl::Event ClEvent; + cl::Event ClEvent_; public: CHIPEventOpenCL(CHIPContextOpenCL *ChipContext, cl_event ClEvent, @@ -103,23 +103,23 @@ public: virtual void hostSignal() override{}; virtual bool updateFinishStatus(bool ThrowErrorIfNotReady = true) override; - cl::Event &get() { return ClEvent; } - cl::UserEvent &getAsUserEv() { return static_cast(ClEvent); } + cl::Event &get() { return ClEvent_; } + cl::UserEvent &getAsUserEv() { return static_cast(ClEvent_); } void reset(cl::UserEvent &&Ev) { - ClEvent = Ev; + ClEvent_ = Ev; logTrace("UserEvent {} Moved into {} || NOW: {}", (void *)Ev.get(), - (void *)this, (void *)ClEvent.get()); + (void *)this, (void *)ClEvent_.get()); } void reset(cl::Event &&Ev) { - ClEvent = Ev; + ClEvent_ = Ev; logTrace("Event {} Moved into {} || NOW: {}", (void *)Ev.get(), - (void *)this, (void *)ClEvent.get()); + (void *)this, (void *)ClEvent_.get()); } void reset(cl_event Ev) { - ClEvent = Ev; + ClEvent_ = Ev; logTrace("Event {} Moved into {} ||| NOW: {}", (void *)Ev, (void *)this, - (void *)ClEvent.get()); + (void *)ClEvent_.get()); } // for elapsedTime diff --git a/src/backend/OpenCL/SVMemoryRegion.cc b/src/backend/OpenCL/SVMemoryRegion.cc index cc76b8a23..1d4f76117 100644 --- a/src/backend/OpenCL/SVMemoryRegion.cc +++ b/src/backend/OpenCL/SVMemoryRegion.cc @@ -70,7 +70,7 @@ void *SVMemoryRegion::allocate(size_t Size, size_t Alignment, Alignment, &Err); break; } - else if (SupportsFineGrain) + } else if (SupportsFineGrain) Ptr = ::clSVMAlloc( Context_(), CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, Size, 0); else From d7a0e71dd374fabf28bc5cf71e74bdbd9a65de26 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 13:59:41 +0300 Subject: [PATCH 33/35] More fixes, remove unused code / comments --- src/backend/OpenCL/CHIPBackendOpenCL.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 885ee1908..de509e4b3 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -1598,7 +1598,6 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( return Status == CL_SUCCESS; } -// DONE bool CHIPGraphNativeOpenCL::addMemcpyNode( CHIPGraphNodeMemcpyFromSymbol *Node, std::vector &SyncPointDeps, @@ -1626,7 +1625,6 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( return Status == CL_SUCCESS; } -// DONE bool CHIPGraphNativeOpenCL::addMemcpyNode( CHIPGraphNodeMemcpyToSymbol *Node, std::vector &SyncPointDeps, @@ -1652,7 +1650,6 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( return Status == CL_SUCCESS; } -// DONE bool CHIPGraphNativeOpenCL::addMemsetNode( CHIPGraphNodeMemset *Node, std::vector &SyncPointDeps, cl_sync_point_khr *SyncPoint) { @@ -1677,7 +1674,6 @@ bool CHIPGraphNativeOpenCL::addMemsetNode( #ifdef cl_pocl_command_buffer_host_exec -// DONE bool CHIPGraphNativeOpenCL::addHostNode( CHIPGraphNodeHost *Node, std::vector &SyncPointDeps, cl_sync_point_khr *SyncPoint) { @@ -1693,7 +1689,6 @@ bool CHIPGraphNativeOpenCL::addHostNode( return Status == CL_SUCCESS; } -// TODO output cl_event bool CHIPGraphNativeOpenCL::addEventRecordNode( CHIPGraphNodeEventRecord *Node, std::vector &SyncPointDeps, @@ -1701,17 +1696,19 @@ bool CHIPGraphNativeOpenCL::addEventRecordNode( if (!Exts->clCommandSignalEventPOCL) return false; + return false; + // unfinished +#if 0 CHIPEvent *E = Node->getEvent(); CHIPEventOpenCL *CLE = static_cast(E); int Status; - // TODO BROKEN Status = Exts->clCommandSignalEventPOCL(Handle, nullptr, nullptr, SyncPoint, nullptr); return Status == CL_SUCCESS; +#endif } -// DONE bool CHIPGraphNativeOpenCL::addEventWaitNode( CHIPGraphNodeWaitEvent *Node, std::vector &SyncPointDeps, cl_sync_point_khr *SyncPoint) { From 582d4ff8fb0e7858ebeb09e8ffebcba33fae6803 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 14:13:00 +0300 Subject: [PATCH 34/35] Rename getRegion -> getSVMRegion, rename private variables --- src/backend/OpenCL/CHIPBackendOpenCL.cc | 214 ++++++++++++------------ src/backend/OpenCL/CHIPBackendOpenCL.hh | 71 ++++---- src/backend/OpenCL/SVMemoryRegion.cc | 8 +- 3 files changed, 146 insertions(+), 147 deletions(-) diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index de509e4b3..8939fcb65 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -234,12 +234,12 @@ annotateSvmPointers(const CHIPContextOpenCL &Ctx, cl_kernel KernelAPIHandle) { std::vector SvmAnnotationList; std::unique_ptr>> SvmKeepAlives; LOCK(Ctx.ContextMtx); // CHIPContextOpenCL::SvmMemory - auto NumSvmAllocations = Ctx.getRegion().getNumAllocations(); + auto NumSvmAllocations = Ctx.getSVMRegion().getNumAllocations(); if (NumSvmAllocations) { SvmAnnotationList.reserve(NumSvmAllocations); SvmKeepAlives.reset(new std::vector>()); SvmKeepAlives->reserve(NumSvmAllocations); - for (std::shared_ptr Ptr : Ctx.getRegion().getSvmPointers()) { + for (std::shared_ptr Ptr : Ctx.getSVMRegion().getSvmPointers()) { SvmAnnotationList.push_back(Ptr.get()); SvmKeepAlives->push_back(Ptr); } @@ -364,7 +364,7 @@ CHIPDeviceOpenCL::createTexture(const hipResourceDesc *ResDesc, CHIPDeviceOpenCL::CHIPDeviceOpenCL(CHIPContextOpenCL *ChipCtx, cl::Device DevIn, int Idx) - : CHIPDevice(ChipCtx, Idx), ClDevice(DevIn) { + : CHIPDevice(ChipCtx, Idx), ClDevice_(DevIn) { logTrace("CHIPDeviceOpenCL initialized via OpenCL device pointer and context " "pointer"); } @@ -382,21 +382,21 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { cl_int Err; std::string Temp; - this->MaxMallocSize_ = ClDevice.getInfo(); - Temp = ClDevice.getInfo(); + this->MaxMallocSize_ = ClDevice_.getInfo(); + Temp = ClDevice_.getInfo(); strncpy(HipDeviceProps_.name, Temp.c_str(), 255); HipDeviceProps_.name[255] = 0; HipDeviceProps_.totalGlobalMem = - ClDevice.getInfo(&Err); + ClDevice_.getInfo(&Err); HipDeviceProps_.sharedMemPerBlock = - ClDevice.getInfo(&Err); + ClDevice_.getInfo(&Err); HipDeviceProps_.maxThreadsPerBlock = - ClDevice.getInfo(&Err); + ClDevice_.getInfo(&Err); - std::vector Wi = ClDevice.getInfo(); + std::vector Wi = ClDevice_.getInfo(); HipDeviceProps_.maxThreadsDim[0] = Wi[0]; HipDeviceProps_.maxThreadsDim[1] = Wi[1]; @@ -404,23 +404,23 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { // Maximum configured clock frequency of the device in MHz. HipDeviceProps_.clockRate = - 1000 * ClDevice.getInfo(); + 1000 * ClDevice_.getInfo(); HipDeviceProps_.multiProcessorCount = - ClDevice.getInfo(); + ClDevice_.getInfo(); HipDeviceProps_.l2CacheSize = - ClDevice.getInfo(); + ClDevice_.getInfo(); // not actually correct HipDeviceProps_.totalConstMem = - ClDevice.getInfo(); + ClDevice_.getInfo(); // totally made up HipDeviceProps_.regsPerBlock = 64; HipDeviceProps_.warpSize = CHIP_DEFAULT_WARP_SIZE; // Try to check that we support the default warp size. - std::vector Sg = ClDevice.getInfo(); + std::vector Sg = ClDevice_.getInfo(); if (std::find(Sg.begin(), Sg.end(), CHIP_DEFAULT_WARP_SIZE) == Sg.end()) { logWarn( "The device might not support subgroup size {}, warp-size sensitive " @@ -440,7 +440,7 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { HipDeviceProps_.computeMode = 0; HipDeviceProps_.arch = {}; - Temp = ClDevice.getInfo(); + Temp = ClDevice_.getInfo(); if (Temp.find("cl_khr_global_int32_base_atomics") != std::string::npos) HipDeviceProps_.arch.hasGlobalInt32Atomics = 1; else @@ -501,11 +501,11 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { HipDeviceProps_.pageableMemoryAccess = 0; HipDeviceProps_.pageableMemoryAccessUsesHostPageTables = 0; - auto Max1D2DWidth = ClDevice.getInfo(); - auto Max2DHeight = ClDevice.getInfo(); - auto Max3DWidth = ClDevice.getInfo(); - auto Max3DHeight = ClDevice.getInfo(); - auto Max3DDepth = ClDevice.getInfo(); + auto Max1D2DWidth = ClDevice_.getInfo(); + auto Max2DHeight = ClDevice_.getInfo(); + auto Max3DWidth = ClDevice_.getInfo(); + auto Max3DHeight = ClDevice_.getInfo(); + auto Max3DDepth = ClDevice_.getInfo(); // Clamp texture dimensions to [0, INT_MAX] because the return value // of hipDeviceGetAttribute() is int type. @@ -726,7 +726,7 @@ void CHIPModuleOpenCL::compile(CHIPDevice *ChipDev) { addKernel(ChipKernel); } - Program_ = Program; + ClProgram_ = Program; } CHIPQueue *CHIPDeviceOpenCL::createQueue(CHIPQueueFlags Flags, int Priority) { @@ -753,9 +753,9 @@ CHIPKernelOpenCL *CHIPKernelOpenCL::clone() { // NOTE: clCloneKernel is not used here due to its experience on // Intel (GPU) OpenCL which crashed if clSetKernelArgSVMPointer() was // called on the original cl_kernel. - auto Kernel = cl::Kernel(Module->get(), Name_.c_str(), &Err); + auto Kernel = cl::Kernel(Module_->get(), Name_.c_str(), &Err); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd); - return new CHIPKernelOpenCL(Kernel, Device, Name_, getFuncInfo(), Module); + return new CHIPKernelOpenCL(Kernel, Device_, Name_, getFuncInfo(), Module_); } hipError_t CHIPKernelOpenCL::getAttributes(hipFuncAttributes *Attr) { @@ -780,27 +780,27 @@ hipError_t CHIPKernelOpenCL::getAttributes(hipFuncAttributes *Attr) { CHIPKernelOpenCL::CHIPKernelOpenCL(cl::Kernel ClKernel, CHIPDeviceOpenCL *Dev, std::string HostFName, SPVFuncInfo *FuncInfo, CHIPModuleOpenCL *Parent) - : CHIPKernel(HostFName, FuncInfo), Module(Parent), Device(Dev) { + : CHIPKernel(HostFName, FuncInfo), Module_(Parent), Device_(Dev) { - OclKernel_ = ClKernel; + ClKernel_ = ClKernel; int Err = 0; // TODO attributes - cl_uint NumArgs = OclKernel_.getInfo(&Err); + cl_uint NumArgs = ClKernel_.getInfo(&Err); CHIPERR_CHECK_LOG_AND_THROW(Err, CL_SUCCESS, hipErrorTbd, "Failed to get num args for kernel"); assert(FuncInfo_->getNumKernelArgs() == NumArgs); MaxWorkGroupSize_ = - OclKernel_.getWorkGroupInfo(Device->get()); + ClKernel_.getWorkGroupInfo(Device_->get()); StaticLocalSize_ = - OclKernel_.getWorkGroupInfo(Device->get()); + ClKernel_.getWorkGroupInfo(Device_->get()); MaxDynamicLocalSize_ = - (size_t)Device->getAttr(hipDeviceAttributeMaxSharedMemoryPerBlock) - + (size_t)Device_->getAttr(hipDeviceAttributeMaxSharedMemoryPerBlock) - StaticLocalSize_; PrivateSize_ = - OclKernel_.getWorkGroupInfo(Device->get()); + ClKernel_.getWorkGroupInfo(Device_->get()); - Name_ = OclKernel_.getInfo(); + Name_ = ClKernel_.getInfo(); if (NumArgs > 0) { logTrace("Kernel {} numArgs: {} \n", Name_, NumArgs); @@ -821,7 +821,7 @@ bool CHIPContextOpenCL::allDevicesSupportFineGrainSVMorUSM() { void CHIPContextOpenCL::freeImpl(void *Ptr) { LOCK(ContextMtx); // CHIPContextOpenCL::SvmMemory - SvmMemory.free(Ptr); + SvmMemory_.free(Ptr); } CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, @@ -829,40 +829,40 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, logTrace("CHIPContextOpenCL Initialized via OpenCL Context pointer."); std::string DevExts = Dev.getInfo(); - std::memset(&Exts, 0, sizeof(Exts)); + std::memset(&Exts_, 0, sizeof(Exts_)); SupportsCommandBuffers = DevExts.find("cl_khr_command_buffer") != std::string::npos; if (SupportsCommandBuffers) { logDebug("Device supports cl_khr_command_buffer"); - Exts.clCreateCommandBufferKHR = + Exts_.clCreateCommandBufferKHR = (clCreateCommandBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCreateCommandBufferKHR"); - Exts.clCommandCopyBufferKHR = + Exts_.clCommandCopyBufferKHR = (clCommandCopyBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCommandCopyBufferKHR"); - Exts.clCommandCopyBufferRectKHR = (clCommandCopyBufferRectKHR_fn):: + Exts_.clCommandCopyBufferRectKHR = (clCommandCopyBufferRectKHR_fn):: clGetExtensionFunctionAddressForPlatform(Plat(), "clCommandCopyBufferRectKHR"); - Exts.clCommandFillBufferKHR = + Exts_.clCommandFillBufferKHR = (clCommandFillBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCommandFillBufferKHR"); - Exts.clCommandNDRangeKernelKHR = (clCommandNDRangeKernelKHR_fn):: + Exts_.clCommandNDRangeKernelKHR = (clCommandNDRangeKernelKHR_fn):: clGetExtensionFunctionAddressForPlatform(Plat(), "clCommandNDRangeKernelKHR"); - Exts.clCommandBarrierWithWaitListKHR = + Exts_.clCommandBarrierWithWaitListKHR = (clCommandBarrierWithWaitListKHR_fn):: clGetExtensionFunctionAddressForPlatform( Plat(), "clCommandBarrierWithWaitListKHR"); - Exts.clFinalizeCommandBufferKHR = (clFinalizeCommandBufferKHR_fn):: + Exts_.clFinalizeCommandBufferKHR = (clFinalizeCommandBufferKHR_fn):: clGetExtensionFunctionAddressForPlatform(Plat(), "clFinalizeCommandBufferKHR"); - Exts.clEnqueueCommandBufferKHR = (clEnqueueCommandBufferKHR_fn):: + Exts_.clEnqueueCommandBufferKHR = (clEnqueueCommandBufferKHR_fn):: clGetExtensionFunctionAddressForPlatform(Plat(), "clEnqueueCommandBufferKHR"); - Exts.clReleaseCommandBufferKHR = (clReleaseCommandBufferKHR_fn):: + Exts_.clReleaseCommandBufferKHR = (clReleaseCommandBufferKHR_fn):: clGetExtensionFunctionAddressForPlatform(Plat(), "clReleaseCommandBufferKHR"); - Exts.clGetCommandBufferInfoKHR = (clGetCommandBufferInfoKHR_fn):: + Exts_.clGetCommandBufferInfoKHR = (clGetCommandBufferInfoKHR_fn):: clGetExtensionFunctionAddressForPlatform(Plat(), "clGetCommandBufferInfoKHR"); } @@ -871,16 +871,16 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, DevExts.find("cl_pocl_command_buffer_svm") != std::string::npos; if (SupportsCommandBuffersSVM) { logDebug("Device supports cl_pocl_command_buffer_svm"); - Exts.clCommandSVMMemcpyPOCL = + Exts_.clCommandSVMMemcpyPOCL = (clCommandSVMMemcpyPOCL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCommandSVMMemcpyPOCL"); - Exts.clCommandSVMMemcpyRectPOCL = (clCommandSVMMemcpyRectPOCL_fn):: + Exts_.clCommandSVMMemcpyRectPOCL = (clCommandSVMMemcpyRectPOCL_fn):: clGetExtensionFunctionAddressForPlatform(Plat(), "clCommandSVMMemcpyRectPOCL"); - Exts.clCommandSVMMemfillPOCL = + Exts_.clCommandSVMMemfillPOCL = (clCommandSVMMemfillPOCL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCommandSVMMemfillPOCL"); - Exts.clCommandSVMMemfillRectPOCL = (clCommandSVMMemfillRectPOCL_fn):: + Exts_.clCommandSVMMemfillRectPOCL = (clCommandSVMMemfillRectPOCL_fn):: clGetExtensionFunctionAddressForPlatform(Plat(), "clCommandSVMMemfillRectPOCL"); } @@ -890,13 +890,13 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, DevExts.find("cl_pocl_command_buffer_host_exec") != std::string::npos; if (SupportsCommandBuffersHost) { logDebug("Device supports cl_pocl_command_buffer_host_exec"); - Exts.clCommandHostFuncPOCL = + Exts_.clCommandHostFuncPOCL = (clCommandHostFuncPOCL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCommandHostFuncPOCL"); - Exts.clCommandWaitForEventPOCL = (clCommandWaitForEventPOCL_fn):: + Exts_.clCommandWaitForEventPOCL = (clCommandWaitForEventPOCL_fn):: clGetExtensionFunctionAddressForPlatform(Plat(), "clCommandWaitForEventPOCL"); - Exts.clCommandSignalEventPOCL = + Exts_.clCommandSignalEventPOCL = (clCommandSignalEventPOCL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clCommandSignalEventPOCL"); } @@ -910,16 +910,16 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, #endif if (SupportsIntelUSM) { logDebug("Device supports Intel USM"); - Exts.USM.clSharedMemAllocINTEL = + Exts_.USM.clSharedMemAllocINTEL = (clSharedMemAllocINTEL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clSharedMemAllocINTEL"); - Exts.USM.clDeviceMemAllocINTEL = + Exts_.USM.clDeviceMemAllocINTEL = (clDeviceMemAllocINTEL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clDeviceMemAllocINTEL"); - Exts.USM.clHostMemAllocINTEL = + Exts_.USM.clHostMemAllocINTEL = (clHostMemAllocINTEL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clHostMemAllocINTEL"); - Exts.USM.clMemFreeINTEL = + Exts_.USM.clMemFreeINTEL = (clMemFreeINTEL_fn)::clGetExtensionFunctionAddressForPlatform( Plat(), "clMemFreeINTEL"); } else @@ -935,8 +935,8 @@ CHIPContextOpenCL::CHIPContextOpenCL(cl::Context CtxIn, cl::Device Dev, else logTrace("Device does not support fine grain SVM"); - ClContext = CtxIn; - SvmMemory.init(CtxIn, Dev, Exts.USM, SupportsFineGrainSVM, SupportsIntelUSM); + ClContext_ = CtxIn; + SvmMemory_.init(CtxIn, Dev, Exts_.USM, SupportsFineGrainSVM, SupportsIntelUSM); } void *CHIPContextOpenCL::allocateImpl(size_t Size, size_t Alignment, @@ -945,7 +945,7 @@ void *CHIPContextOpenCL::allocateImpl(size_t Size, size_t Alignment, void *Retval; LOCK(ContextMtx); // CHIPContextOpenCL::SvmMemory - Retval = SvmMemory.allocate(Size, Alignment, MemType); + Retval = SvmMemory_.allocate(Size, Alignment, MemType); return Retval; } @@ -987,15 +987,15 @@ void CHIPQueueOpenCL::MemMap(const AllocationInfo *AllocInfo, // TODO why does this code use blocking = true ?? if (Type == CHIPQueue::MEM_MAP_TYPE::HOST_READ) { logDebug("CHIPQueueOpenCL::MemMap HOST_READ"); - Status = ClQueue.enqueueMapSVM(AllocInfo->HostPtr, CL_TRUE, CL_MAP_READ, + Status = ClQueue_.enqueueMapSVM(AllocInfo->HostPtr, CL_TRUE, CL_MAP_READ, AllocInfo->Size); } else if (Type == CHIPQueue::MEM_MAP_TYPE::HOST_WRITE) { logDebug("CHIPQueueOpenCL::MemMap HOST_WRITE"); - Status = ClQueue.enqueueMapSVM(AllocInfo->HostPtr, CL_TRUE, CL_MAP_WRITE, + Status = ClQueue_.enqueueMapSVM(AllocInfo->HostPtr, CL_TRUE, CL_MAP_WRITE, AllocInfo->Size); } else if (Type == CHIPQueue::MEM_MAP_TYPE::HOST_READ_WRITE) { logDebug("CHIPQueueOpenCL::MemMap HOST_READ_WRITE"); - Status = ClQueue.enqueueMapSVM(AllocInfo->HostPtr, CL_TRUE, + Status = ClQueue_.enqueueMapSVM(AllocInfo->HostPtr, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, AllocInfo->Size); } else { @@ -1012,7 +1012,7 @@ void CHIPQueueOpenCL::MemUnmap(const AllocationInfo *AllocInfo) { } logDebug("CHIPQueueOpenCL::MemUnmap"); - auto Status = ClQueue.enqueueUnmapSVM(AllocInfo->HostPtr); + auto Status = ClQueue_.enqueueUnmapSVM(AllocInfo->HostPtr); assert(Status == CL_SUCCESS); } @@ -1066,7 +1066,7 @@ void CHIPQueueOpenCL::addCallback(hipStreamCallback_t Callback, CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); updateLastEvent(CallbackCompleted); - ClQueue.flush(); + ClQueue_.flush(); // Now the CB can start executing in the background: HoldBackEvent->getAsUserEv().setStatus(CL_COMPLETE); @@ -1078,7 +1078,7 @@ CHIPEvent *CHIPQueueOpenCL::enqueueMarkerImpl() { CHIPEventOpenCL *MarkerEvent = (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); cl::Event RetEv; - auto Status = ClQueue.enqueueMarkerWithWaitList(nullptr, &RetEv); + auto Status = ClQueue_.enqueueMarkerWithWaitList(nullptr, &RetEv); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); MarkerEvent->reset(std::move(RetEv)); MarkerEvent->Msg = "marker"; @@ -1123,7 +1123,7 @@ CHIPEvent *CHIPQueueOpenCL::launchImpl(CHIPExecItem *ExecItem) { annotateSvmPointers(*OclContext, Kernel->get().get()); cl::Event RetEv; - auto Status = ClQueue.enqueueNDRangeKernel(Kernel->get(), GlobalOffset, + auto Status = ClQueue_.enqueueNDRangeKernel(Kernel->get(), GlobalOffset, Global, Local, nullptr, &RetEv); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); @@ -1184,7 +1184,7 @@ CHIPQueueOpenCL::CHIPQueueOpenCL(CHIPDevice *ChipDevice, int Priority, logWarn("CHIPQueueOpenCL is ignoring Priority value"); if (Queue) - ClQueue = cl::CommandQueue(Queue); + ClQueue_ = cl::CommandQueue(Queue); else { cl::Context &ClContext_ = ((CHIPContextOpenCL *)ChipContext_)->get(); cl::Device &ClDevice_ = ((CHIPDeviceOpenCL *)ChipDevice_)->get(); @@ -1198,7 +1198,7 @@ CHIPQueueOpenCL::CHIPQueueOpenCL(CHIPDevice *ChipDevice, int Priority, const cl_command_queue Q = clCreateCommandQueueWithProperties( ClContext_.get(), ClDevice_.get(), QueueProperties, &Status); - ClQueue = cl::CommandQueue(Q); + ClQueue_ = cl::CommandQueue(Q); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorInitializationError); @@ -1224,14 +1224,14 @@ CHIPEvent *CHIPQueueOpenCL::memCopyAsyncImpl(void *Dst, const void *Src, // makes/ it pass, but Intel CPU OpenCL returns CL_​MEM_​COPY_​OVERLAP // like it should. To unify the behavior, let's convert the special case to // a maker here, so we can return an event. - auto Status = ClQueue.enqueueMarkerWithWaitList(nullptr, &RetEv); + auto Status = ClQueue_.enqueueMarkerWithWaitList(nullptr, &RetEv); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); } else { #ifdef DUBIOUS_LOCKS LOCK(Backend->DubiousLockOpenCL) #endif cl_event E = nullptr; - auto Status = ::clEnqueueSVMMemcpy(ClQueue.get(), CL_FALSE, Dst, Src, Size, + auto Status = ::clEnqueueSVMMemcpy(ClQueue_.get(), CL_FALSE, Dst, Src, Size, 0, nullptr, &E); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorRuntimeMemory); RetEv = E; @@ -1244,7 +1244,7 @@ void CHIPQueueOpenCL::finish() { #ifdef DUBIOUS_LOCKS LOCK(Backend->DubiousLockOpenCL) #endif - auto Status = ClQueue.finish(); + auto Status = ClQueue_.finish(); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); } @@ -1255,7 +1255,7 @@ CHIPEvent *CHIPQueueOpenCL::memFillAsyncImpl(void *Dst, size_t Size, (CHIPEventOpenCL *)Backend->createCHIPEvent(ChipContext_); logTrace("clSVMmemfill {} / {} B\n", Dst, Size); cl_event Ev = nullptr; - auto Status = ::clEnqueueSVMMemFill(ClQueue.get(), Dst, Pattern, PatternSize, + auto Status = ::clEnqueueSVMMemFill(ClQueue_.get(), Dst, Pattern, PatternSize, Size, 0, nullptr, &Ev); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorRuntimeMemory); cl::Event RetEv(Ev); @@ -1289,7 +1289,7 @@ hipError_t CHIPQueueOpenCL::getBackendHandles(uintptr_t *NativeInfo, *NumHandles = 4; // Get queue handler - NativeInfo[3] = (uintptr_t)ClQueue.get(); + NativeInfo[3] = (uintptr_t)ClQueue_.get(); // Get context handler cl::Context &Ctx = ((CHIPContextOpenCL *)ChipContext_)->get(); @@ -1327,9 +1327,9 @@ CHIPQueueOpenCL::enqueueBarrierImpl(std::vector *EventsToWaitFor) { auto Ee = (CHIPEventOpenCL *)E; Events.push_back(Ee->get()); } - Status = ClQueue.enqueueBarrierWithWaitList(&Events, &RetEv); + Status = ClQueue_.enqueueBarrierWithWaitList(&Events, &RetEv); } else { - Status = ClQueue.enqueueBarrierWithWaitList(nullptr, &RetEv); + Status = ClQueue_.enqueueBarrierWithWaitList(nullptr, &RetEv); } CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); @@ -1346,7 +1346,7 @@ CHIPGraphNative *CHIPQueueOpenCL::createNativeGraph() { if (!Ctx->supportsCommandBuffers()) return nullptr; - cl_command_queue CQ = ClQueue.get(); + cl_command_queue CQ = ClQueue_.get(); int err = CL_SUCCESS; cl_command_buffer_khr Res = Ctx->exts()->clCreateCommandBufferKHR(1, &CQ, 0, &err); @@ -1369,7 +1369,7 @@ CHIPEvent *CHIPQueueOpenCL::enqueueNativeGraph(CHIPGraphNative *NativeGraph) { return nullptr; if (NativeGraph == nullptr) return nullptr; - cl_command_queue CQ = ClQueue.get(); + cl_command_queue CQ = ClQueue_.get(); cl_event TmpEv = nullptr; int Status = Ctx->exts()->clEnqueueCommandBufferKHR(1, &CQ, G->get(), 0, nullptr, &TmpEv); @@ -1392,8 +1392,8 @@ bool CHIPGraphNativeOpenCL::addNode(CHIPGraphNode *NewNode) { const std::vector &Dependencies = NewNode->getDependencies(); std::vector SyncPointDeps; for (auto Node : Dependencies) { - auto Iter = SyncPointMap.find(Node); - if (Iter == SyncPointMap.end()) { + auto Iter = SyncPointMap_.find(Node); + if (Iter == SyncPointMap_.end()) { logError("Can't find SyncPoint for Node"); return false; } @@ -1450,12 +1450,12 @@ bool CHIPGraphNativeOpenCL::addNode(CHIPGraphNode *NewNode) { if (!Res) return false; - SyncPointMap.insert(std::make_pair(NewNode, NewSyncPoint)); + SyncPointMap_.insert(std::make_pair(NewNode, NewSyncPoint)); return true; } bool CHIPGraphNativeOpenCL::finalize() { - int Status = Exts->clFinalizeCommandBufferKHR(Handle); + int Status = Exts_->clFinalizeCommandBufferKHR(Handle_); if (Status == CL_SUCCESS) { Finalized = true; return true; @@ -1466,9 +1466,9 @@ bool CHIPGraphNativeOpenCL::finalize() { } CHIPGraphNativeOpenCL::~CHIPGraphNativeOpenCL() { - if (Handle == nullptr) + if (Handle_ == nullptr) return; - int Err = Exts->clReleaseCommandBufferKHR(Handle); + int Err = Exts_->clReleaseCommandBufferKHR(Handle_); logError("clReleaseCommandBufferKHR FAILED with status {}", resultToString(Err)); assert(Err == CL_SUCCESS); @@ -1501,9 +1501,9 @@ bool CHIPGraphNativeOpenCL::addKernelNode( Params.blockDim.z * Params.gridDim.z}; uint WorkDim = 3; - assert(Exts->clCommandNDRangeKernelKHR); - Status = Exts->clCommandNDRangeKernelKHR( - Handle, nullptr, Properties, + assert(Exts_->clCommandNDRangeKernelKHR); + Status = Exts_->clCommandNDRangeKernelKHR( + Handle_, nullptr, Properties, CLK->get().get(), // cl_kernel WorkDim, // cl_uint work_dim nullptr, // const size_t* global_work_offset, @@ -1537,7 +1537,7 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( Node->getParams(Dst, Src, Size, Kind); Params = Node->getParams(); if (Dst == nullptr || Src == nullptr) { - if (!Exts->clCommandSVMMemcpyRectPOCL) + if (!Exts_->clCommandSVMMemcpyRectPOCL) return false; // 3D copy // TODO handle arrays @@ -1576,22 +1576,22 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( size_t dst_row_pitch = Params.dstPtr.pitch; size_t dst_slice_pitch = dst_row_pitch * Params.dstPtr.ysize; - Status = Exts->clCommandSVMMemcpyRectPOCL( - Handle, nullptr, Dst, Src, dst_origin, src_origin, region, + Status = Exts_->clCommandSVMMemcpyRectPOCL( + Handle_, nullptr, Dst, Src, dst_origin, src_origin, region, dst_row_pitch, dst_slice_pitch, src_row_pitch, src_slice_pitch, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); } else { // 1D copy - if (!Exts->clCommandSVMMemcpyPOCL) + if (!Exts_->clCommandSVMMemcpyPOCL) return false; if (Dst == Src) { - Status = Exts->clCommandBarrierWithWaitListKHR( - Handle, nullptr, SyncPointDeps.size(), SyncPointDeps.data(), + Status = Exts_->clCommandBarrierWithWaitListKHR( + Handle_, nullptr, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); } else - Status = Exts->clCommandSVMMemcpyPOCL( - Handle, nullptr, Dst, Src, Size, SyncPointDeps.size(), + Status = Exts_->clCommandSVMMemcpyPOCL( + Handle_, nullptr, Dst, Src, Size, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); } @@ -1603,7 +1603,7 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( std::vector &SyncPointDeps, cl_sync_point_khr *SyncPoint) { - if (!Exts->clCommandSVMMemcpyPOCL) + if (!Exts_->clCommandSVMMemcpyPOCL) return false; void *Dst = nullptr; @@ -1618,8 +1618,8 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( if (Err != HIP_SUCCESS) return false; - int Status = Exts->clCommandSVMMemcpyPOCL( - Handle, nullptr, Dst, (const char *)Src + Offset, SizeBytes, + int Status = Exts_->clCommandSVMMemcpyPOCL( + Handle_, nullptr, Dst, (const char *)Src + Offset, SizeBytes, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); return Status == CL_SUCCESS; @@ -1629,7 +1629,7 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( CHIPGraphNodeMemcpyToSymbol *Node, std::vector &SyncPointDeps, cl_sync_point_khr *SyncPoint) { - if (!Exts->clCommandSVMMemcpyPOCL) + if (!Exts_->clCommandSVMMemcpyPOCL) return false; void *Dst = nullptr; @@ -1644,8 +1644,8 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( if (Err != HIP_SUCCESS) return false; - int Status = Exts->clCommandSVMMemcpyPOCL( - Handle, nullptr, (char *)Dst + Offset, Src, SizeBytes, + int Status = Exts_->clCommandSVMMemcpyPOCL( + Handle_, nullptr, (char *)Dst + Offset, Src, SizeBytes, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); return Status == CL_SUCCESS; } @@ -1653,15 +1653,15 @@ bool CHIPGraphNativeOpenCL::addMemcpyNode( bool CHIPGraphNativeOpenCL::addMemsetNode( CHIPGraphNodeMemset *Node, std::vector &SyncPointDeps, cl_sync_point_khr *SyncPoint) { - if (!Exts->clCommandSVMMemfillRectPOCL) + if (!Exts_->clCommandSVMMemfillRectPOCL) return false; hipMemsetParams Params = Node->getParams(); int Status; size_t Region[3] = {Params.width, Params.height, 1}; - Status = Exts->clCommandSVMMemfillRectPOCL( - Handle, nullptr, Params.dst, + Status = Exts_->clCommandSVMMemfillRectPOCL( + Handle_, nullptr, Params.dst, nullptr, // origin Region, // region Params.pitch, // row pitch @@ -1677,14 +1677,14 @@ bool CHIPGraphNativeOpenCL::addMemsetNode( bool CHIPGraphNativeOpenCL::addHostNode( CHIPGraphNodeHost *Node, std::vector &SyncPointDeps, cl_sync_point_khr *SyncPoint) { - if (!Exts->clCommandHostFuncPOCL) + if (!Exts_->clCommandHostFuncPOCL) return false; hipHostNodeParams Params = Node->getParams(); int Status; - Status = Exts->clCommandHostFuncPOCL( - Handle, nullptr, Params.fn, Params.userData, SyncPointDeps.size(), + Status = Exts_->clCommandHostFuncPOCL( + Handle_, nullptr, Params.fn, Params.userData, SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); return Status == CL_SUCCESS; } @@ -1693,7 +1693,7 @@ bool CHIPGraphNativeOpenCL::addEventRecordNode( CHIPGraphNodeEventRecord *Node, std::vector &SyncPointDeps, cl_sync_point_khr *SyncPoint) { - if (!Exts->clCommandSignalEventPOCL) + if (!Exts_->clCommandSignalEventPOCL) return false; return false; @@ -1712,14 +1712,14 @@ bool CHIPGraphNativeOpenCL::addEventRecordNode( bool CHIPGraphNativeOpenCL::addEventWaitNode( CHIPGraphNodeWaitEvent *Node, std::vector &SyncPointDeps, cl_sync_point_khr *SyncPoint) { - if (!Exts->clCommandWaitForEventPOCL) + if (!Exts_->clCommandWaitForEventPOCL) return false; CHIPEvent *E = Node->getEvent(); CHIPEventOpenCL *CLE = static_cast(E); int Status; - Status = Exts->clCommandWaitForEventPOCL(Handle, nullptr, CLE->get().get(), + Status = Exts_->clCommandWaitForEventPOCL(Handle_, nullptr, CLE->get().get(), SyncPoint, nullptr); return Status == CL_SUCCESS; } diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index b8b6c50cd..e7afec518 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -128,11 +128,11 @@ public: class CHIPModuleOpenCL : public CHIPModule { protected: - cl::Program Program_; + cl::Program ClProgram_; public: CHIPModuleOpenCL(const SPVModule &SrcMod); - cl::Program &get() { return Program_; } + cl::Program &get() { return ClProgram_; } virtual ~CHIPModuleOpenCL() {} virtual void compile(CHIPDevice *ChipDevice) override; }; @@ -151,7 +151,7 @@ class SVMemoryRegion { cl::Context Context_; cl::Device Device_; - CHIPContextUSMExts USM; + CHIPContextUSMExts USMExts_; bool SupportsFineGrain; bool SupportsIntelUSM; @@ -208,14 +208,14 @@ typedef struct { class CHIPContextOpenCL : public CHIPContext { private: - cl::Context ClContext; + cl::Context ClContext_; bool SupportsCommandBuffers; bool SupportsCommandBuffersSVM; bool SupportsCommandBuffersHost; bool SupportsIntelUSM; bool SupportsFineGrainSVM; - CHIPContextClExts Exts; - SVMemoryRegion SvmMemory; + CHIPContextClExts Exts_; + SVMemoryRegion SvmMemory_; public: bool allDevicesSupportFineGrainSVMorUSM(); @@ -225,25 +225,25 @@ public: CHIPHostAllocFlags Flags = CHIPHostAllocFlags()) override; bool isAllocatedPtrMappedToVM(void *Ptr) override { return false; } // TODO - const SVMemoryRegion &getRegion() const { return SvmMemory; } + const SVMemoryRegion &getSVMRegion() const { return SvmMemory_; } virtual void freeImpl(void *Ptr) override; - cl::Context &get() { return ClContext; } + cl::Context &get() { return ClContext_; } bool supportsCommandBuffers() const { return SupportsCommandBuffers; } bool supportsCommandBuffersSVM() const { return SupportsCommandBuffersSVM; } bool supportsCommandBuffersHost() const { return SupportsCommandBuffersHost; } - const CHIPContextClExts *exts() const { return &Exts; } + const CHIPContextClExts *exts() const { return &Exts_; } }; class CHIPDeviceOpenCL : public CHIPDevice { private: CHIPDeviceOpenCL(CHIPContextOpenCL *ChipContext, cl::Device ClDevice, int Idx); - cl::Device ClDevice; + cl::Device ClDevice_; public: static CHIPDeviceOpenCL *create(cl::Device ClDevice, CHIPContextOpenCL *ChipContext, int Idx); - cl::Device &get() { return ClDevice; } + cl::Device &get() { return ClDevice_; } virtual void populateDevicePropertiesImpl() override; // unused virtual void resetImpl() override{}; @@ -269,15 +269,14 @@ public: class CHIPQueueOpenCL : public CHIPQueue { protected: // Any reason to make these private/protected? - cl::CommandQueue ClQueue; + cl::CommandQueue ClQueue_; /** * @brief Map memory to device. * - * All OpenCL allocations are done using SVM allocator. On systems with only - * coarse-grain SVM, we need to map the memory before performing any - * operations on the host. If the device supports fine-grain SVM, then no - * mapping will be done. + * On OpenCL systems with only coarse-grain SVM, we need to map the memory + * before performing any operations on the host. If the device supports + * fine-grain SVM or Intel USM, then no mapping will be done. * * @param AllocInfo AllocationInfo object to be mapped for the host * @param Type Type of mapping to be performed. Either READ or WRITE @@ -307,7 +306,7 @@ public: virtual void finish() override; virtual CHIPEvent *memCopyAsyncImpl(void *Dst, const void *Src, size_t Size) override; - cl::CommandQueue &get() { return ClQueue; } + cl::CommandQueue &get() { return ClQueue_; } virtual CHIPEvent *memFillAsyncImpl(void *Dst, size_t Size, const void *Pattern, size_t PatternSize) override; @@ -335,14 +334,14 @@ public: class CHIPKernelOpenCL : public CHIPKernel { private: std::string Name_; - cl::Kernel OclKernel_; + cl::Kernel ClKernel_; size_t MaxDynamicLocalSize_; size_t MaxWorkGroupSize_; size_t StaticLocalSize_; size_t PrivateSize_; - CHIPModuleOpenCL *Module; - CHIPDeviceOpenCL *Device; + CHIPModuleOpenCL *Module_; + CHIPDeviceOpenCL *Device_; public: CHIPKernelOpenCL(cl::Kernel ClKernel, CHIPDeviceOpenCL *Dev, @@ -353,11 +352,11 @@ public: SPVFuncInfo *getFuncInfo() const { return FuncInfo_; } const std::string &getName() const { return Name_; } - cl::Kernel &get() { return OclKernel_; } + cl::Kernel &get() { return ClKernel_; } CHIPKernelOpenCL *clone(); - CHIPModuleOpenCL *getModule() override { return Module; } - const CHIPModuleOpenCL *getModule() const override { return Module; } + CHIPModuleOpenCL *getModule() override { return Module_; } + const CHIPModuleOpenCL *getModule() const override { return Module_; } virtual hipError_t getAttributes(hipFuncAttributes *Attr) override; }; @@ -428,33 +427,33 @@ public: }; class CHIPTextureOpenCL : public CHIPTexture { - cl_mem Image; - cl_sampler Sampler; + cl_mem Image_; + cl_sampler Sampler_; public: CHIPTextureOpenCL() = delete; CHIPTextureOpenCL(const hipResourceDesc &ResDesc, cl_mem TheImage, cl_sampler TheSampler) - : CHIPTexture(ResDesc), Image(TheImage), Sampler(TheSampler) {} + : CHIPTexture(ResDesc), Image_(TheImage), Sampler_(TheSampler) {} virtual ~CHIPTextureOpenCL() { cl_int Status; - Status = clReleaseMemObject(Image); + Status = clReleaseMemObject(Image_); assert(Status == CL_SUCCESS && "Invalid image handler?"); - Status = clReleaseSampler(Sampler); + Status = clReleaseSampler(Sampler_); assert(Status == CL_SUCCESS && "Invalid sampler handler?"); (void)Status; } - cl_mem getImage() const { return Image; } - cl_sampler getSampler() const { return Sampler; } + cl_mem getImage() const { return Image_; } + cl_sampler getSampler() const { return Sampler_; } }; class CHIPGraphNativeOpenCL : public CHIPGraphNative { - cl_command_buffer_khr Handle; - cl_command_queue CmdQ; - std::map SyncPointMap; - const CHIPContextClExts *Exts; + cl_command_buffer_khr Handle_; + cl_command_queue CmdQ_; + std::map SyncPointMap_; + const CHIPContextClExts *Exts_; bool addKernelNode(CHIPGraphNodeKernel *Node, std::vector &SyncPointDeps, @@ -489,9 +488,9 @@ class CHIPGraphNativeOpenCL : public CHIPGraphNative { public: CHIPGraphNativeOpenCL(cl_command_buffer_khr H, cl_command_queue CQ, const CHIPContextClExts *E) - : Handle(H), CmdQ(CQ), Exts(E) {} + : Handle_(H), CmdQ_(CQ), Exts_(E) {} virtual ~CHIPGraphNativeOpenCL(); - cl_command_buffer_khr get() const { return Handle; } + cl_command_buffer_khr get() const { return Handle_; } virtual bool finalize() override; virtual bool addNode(CHIPGraphNode *NewNode) override; }; diff --git a/src/backend/OpenCL/SVMemoryRegion.cc b/src/backend/OpenCL/SVMemoryRegion.cc index 1d4f76117..0f13639bc 100644 --- a/src/backend/OpenCL/SVMemoryRegion.cc +++ b/src/backend/OpenCL/SVMemoryRegion.cc @@ -28,7 +28,7 @@ void SVMemoryRegion::init(cl::Context C, cl::Device D, CHIPContextUSMExts U, bool FineGrain, bool IntelUSM) { Device_ = D; Context_ = C; - USM = U; + USMExts_ = U; SupportsFineGrain = FineGrain; SupportsIntelUSM = IntelUSM; } @@ -37,7 +37,7 @@ SVMemoryRegion &SVMemoryRegion::operator=(SVMemoryRegion &&Rhs) { SvmAllocations_ = std::move(Rhs.SvmAllocations_); Context_ = std::move(Rhs.Context_); Device_ = std::move(Rhs.Device_); - USM = std::move(Rhs.USM); + USMExts_ = std::move(Rhs.USMExts_); SupportsFineGrain = Rhs.SupportsFineGrain; SupportsIntelUSM = Rhs.SupportsIntelUSM; return *this; @@ -66,7 +66,7 @@ void *SVMemoryRegion::allocate(size_t Size, size_t Alignment, case hipMemoryTypeUnified: */ default: - Ptr = USM.clSharedMemAllocINTEL(Context_(), Device_(), NULL, Size, + Ptr = USMExts_.clSharedMemAllocINTEL(Context_(), Device_(), NULL, Size, Alignment, &Err); break; } @@ -79,7 +79,7 @@ void *SVMemoryRegion::allocate(size_t Size, size_t Alignment, if (Ptr) { auto Deleter = [Ctx = this->Context_, SupportsUSM = this->SupportsIntelUSM, clMemFreeINTEL = - this->USM.clMemFreeINTEL](void *PtrToFree) -> void { + this->USMExts_.clMemFreeINTEL](void *PtrToFree) -> void { logTrace("clSVMFree on: {}\n", PtrToFree); if (SupportsUSM) clMemFreeINTEL(Ctx(), PtrToFree); From 0f4f3c892ad952a136bd1412492af9fbb55844cc Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 17 May 2023 15:59:26 +0300 Subject: [PATCH 35/35] Style fix src/CHIPGraph.hh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Henry Linjamäki --- src/CHIPGraph.hh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/CHIPGraph.hh b/src/CHIPGraph.hh index 4b789ccb8..469dd92fd 100644 --- a/src/CHIPGraph.hh +++ b/src/CHIPGraph.hh @@ -121,10 +121,9 @@ public: * @param TheNode */ void addDependency(CHIPGraphNode *TheNode) { - if (TheNode == nullptr) { + if (TheNode == nullptr) CHIPERR_LOG_AND_THROW("addDependency called with nullptr", hipErrorInvalidValue); - } logDebug("{} addDependency() <{} depends on {}>", (void *)this, Msg, TheNode->Msg); Dependencies_.push_back(TheNode);