diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 605571254..8d446b606 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -3,47 +3,58 @@ name: Presubmit permissions: contents: read -on: [push, pull_request] +# Controls when the action will run. +on: + push: + workflow_dispatch: + pull_request: +# These jobs are potentially parallelizeable jobs: build: - name: Build all specs + name: Build spec artifacts runs-on: ubuntu-latest + # Refer to the build container by its SHA instead of the name, to + # prevent caching problems when updating the image. + # container: khronosgroup/docker-images:asciidoctor-spec.20240702 + container: khronosgroup/docker-images@sha256:4aab96a03ef292439c9bd0f972adfa29cdf838d0909b1cb4ec2a6d7b2d14a37f steps: - uses: actions/checkout@v4 with: - fetch-depth: 0 submodules: recursive + # If fetch-depth: 0 is not specified, then + # git describe --tags --dirty + # below fails. + # This could also affect SPECREVISION in the Makefile. + fetch-depth: 0 - - name: Install required packages + # Ownerships in the working directory are odd. + # . is owned by UID 1001, while repo files are owned by root. + # This leads to many odd messages like + # fatal: detected dubious ownership in repository at '/__w/OpenCL-Docs/OpenCL-Docs' + # The 'git config' is a brute-force workaround. + - name: Git safe directory workaround run: | - sudo apt-get install -y libpango1.0-dev libwebp-dev ghostscript fonts-lyx jing libavalon-framework-java libbatik-java python3-pyparsing - sudo gem install asciidoctor -v 2.0.16 - sudo gem install coderay -v 1.1.1 - sudo gem install rouge -v 3.19.0 - sudo gem install ttfunk -v 1.7.0 - sudo gem install hexapdf -v 0.27.0 - sudo gem install asciidoctor-pdf -v 2.3.4 - sudo gem install asciidoctor-mathematical -v 0.3.5 - sudo pip install pyparsing - - - name: List git tag + git config --global --add safe.directory '*' + ls -lda . .. .git Makefile + + - name: Validate XML run: | - git describe --tags --dirty + make -C xml validate - name: Generate core specs (HTML and PDF) run: | - python3 makeSpec -clean -spec core OUTDIR=out.core -j 5 api c env ext cxx4opencl - - - name: Generate core + extension specs (HTML) + python3 makeSpec -clean -spec core OUTDIR=out.core -j 5 -O api c env ext cxx4opencl + + - name: Generate core + KHR extension specs (HTML) run: | - python3 makeSpec -clean -spec khr OUTDIR=out.khr -j 12 html + python3 makeSpec -clean -spec khr OUTDIR=out.khr -j -O html - - name: Generate reference pages + - name: Generate core + KHR + EXT extension specs (HTML) run: | - python3 makeSpec -spec khr OUTDIR=out.refpages -j 12 manhtmlpages + python3 makeSpec -clean -spec khr+ext OUTDIR=out.khr+ext -j -O html - - name: Validate XML + - name: Generate reference pages run: | - make -C xml validate + python3 makeSpec -spec khr OUTDIR=out.refpages -j -O manhtmlpages diff --git a/Makefile b/Makefile index 1540b8343..5dcaae6f9 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ EXTOPTIONS := $(foreach ext,$(EXTS),-extension $(ext)) QUIET ?= VERYQUIET ?= @ +PYTHON ?= python3 ASCIIDOCTOR ?= asciidoctor RM = rm -f RMRF = rm -rf @@ -72,8 +73,8 @@ SPECREVISION = $(shell echo `git describe --tags --dirty`) # This used to be a dependency in the spec html/pdf targets, # but that's likely to lead to merge conflicts. Just regenerate # when pushing a new spec for review to the sandbox. -SPECREMARK = from git branch: $(shell echo `git symbolic-ref --short HEAD`) \ - commit: $(shell echo `git log -1 --format="%H"`) +SPECREMARK = from git branch: $(shell echo `git symbolic-ref --short HEAD 2> /dev/null || echo Git branch not available`) \ + commit: $(shell echo `git log -1 --format="%H" 2> /dev/null || echo Git commit not available`) endif # The C++ for OpenCL document revision scheme is aligned with its release date. # Revision naming scheme is as follows: @@ -116,7 +117,7 @@ ADOCCOMMONOPTS = -a apispec="$(CURDIR)/api" \ -a cspec="$(CURDIR)/c" \ -a images="$(CURDIR)/images" \ $(ATTRIBOPTS) $(NOTEOPTS) $(VERBOSE) $(ADOCEXTS) -ADOCOPTS = -d book $(ADOCCOMMONOPTS) +ADOCOPTS = --failure-level ERROR -d book $(ADOCCOMMONOPTS) # Asciidoctor options to build refpages # @@ -511,9 +512,11 @@ $(MANHTMLDIR)/intro.html: $(REFPATH)/intro.txt $(MANCOPYRIGHT) REGISTRY = $(ROOTDIR)/xml APIXML = $(REGISTRY)/cl.xml +CFEATURES = c/features.txt GENSCRIPT = $(SCRIPTS)/gencl.py DICTSCRIPT = $(SCRIPTS)/gen_dictionaries.py VERSIONSCRIPT = $(SCRIPTS)/gen_version_notes.py +CFEATSCRIPT = $(SCRIPTS)/gen_c_feature_dictionary.py GENSCRIPTOPTS = $(VERSIONOPTIONS) $(EXTOPTIONS) $(GENSCRIPTEXTRA) -registry $(APIXML) GENSCRIPTEXTRA = @@ -539,12 +542,14 @@ extinc: $(METADEPEND) $(METADEPEND): $(APIXML) $(GENSCRIPT) $(QUIET)$(MKDIR) $(METAPATH) $(QUIET)$(PYTHON) $(GENSCRIPT) $(GENSCRIPTOPTS) -o $(METAPATH) extinc + $(QUIET)$(PYTHON) $(CFEATSCRIPT) -features $(CFEATURES) -o $(METAPATH)/c-feature-dictionary.asciidoc # This generates a single file containing asciidoc attributes for each # extension in the spec being built. attribs: $(ATTRIBFILE) $(ATTRIBFILE): + $(QUIET)$(MKDIR) $(dir $@) for attrib in $(EXTS) ; do \ echo ":$${attrib}:" ; \ done > $@ diff --git a/OpenCL_C.txt b/OpenCL_C.txt index 610b54e3c..c6a096a7d 100644 --- a/OpenCL_C.txt +++ b/OpenCL_C.txt @@ -5845,7 +5845,7 @@ the application. | `FLT_RADIX` | {CL_FLT_RADIX} | `FLT_MAX` | {CL_FLT_MAX} | `FLT_MIN` | {CL_FLT_MIN} -| `FLT_EPSILSON` | {CL_FLT_EPSILON} +| `FLT_EPSILON` | {CL_FLT_EPSILON} |==== The following macros shall expand to integer constant expressions whose @@ -5916,7 +5916,7 @@ the application. | `DBL_MIN_EXP` | {CL_DBL_MIN_EXP} | `DBL_MAX` | {CL_DBL_MAX} | `DBL_MIN` | {CL_DBL_MIN} -| `DBL_EPSILSON` | {CL_DBL_EPSILON} +| `DBL_EPSILON` | {CL_DBL_EPSILON} |==== The following constants are also available. @@ -5986,7 +5986,7 @@ the application. | `HALF_RADIX` | {CL_HALF_RADIX} | `HALF_MAX` | {CL_HALF_MAX} | `HALF_MIN` | {CL_HALF_MIN} -| `HALF_EPSILSON` | {CL_HALF_EPSILON} +| `HALF_EPSILON` | {CL_HALF_EPSILON} |==== The following constants are also available. @@ -17300,7 +17300,7 @@ used in the conversions described below. When approximate rounding is used instead of the preferred rounding, the result of the conversion must satisfy the bound given below. -`half` {rightarrow` {CL_UNORM_INT8} (8-bit unsigned integer) +`half` {rightarrow} {CL_UNORM_INT8} (8-bit unsigned integer) [none] * Let f~exact~ = *max*(`0`, *min*(`f * 255`, `255`)) @@ -17308,7 +17308,7 @@ result of the conversion must satisfy the bound given below. * Let f~approx~ = *convert_uchar_sat_*(`f * 255.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` {CL_UNORM_INT_101010} (10-bit unsigned integer) +`half` {rightarrow} {CL_UNORM_INT_101010} (10-bit unsigned integer) [none] * Let f~exact~ = *max*(`0`, *min*(`f * 1023`, `1023`)) @@ -17317,7 +17317,7 @@ result of the conversion must satisfy the bound given below. * Let f~approx~ = *convert_ushort_sat_*(`f * 1023.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` {CL_UNORM_INT16} (16-bit unsigned integer) +`half` {rightarrow} {CL_UNORM_INT16} (16-bit unsigned integer) [none] * Let f~exact~ = *max*(`0`, *min*(`f * 65535`, `65535`)) @@ -17326,7 +17326,7 @@ result of the conversion must satisfy the bound given below. 65535.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` {CL_SNORM_INT8} (8-bit signed integer) +`half` {rightarrow} {CL_SNORM_INT8} (8-bit signed integer) [none] * Let f~exact~ = *max*(`-128`, *min*(`f * 127`, `127`)) @@ -17334,7 +17334,7 @@ result of the conversion must satisfy the bound given below. * Let f~approx~ = *convert_char_sat_*(`f * 127.0f`) * *fabs*(f~exact~ - f~approx~) must be \<= 0.6 -`half` {rightarrow` {CL_SNORM_INT16} (16-bit signed integer) +`half` {rightarrow} {CL_SNORM_INT16} (16-bit signed integer) [none] * Let f~exact~ = *max*(`-32768`, *min*(`f * 32767`, `32767`)) diff --git a/api/appendix_e.asciidoc b/api/appendix_e.asciidoc index 9df39d32f..ba9534480 100644 --- a/api/appendix_e.asciidoc +++ b/api/appendix_e.asciidoc @@ -377,7 +377,7 @@ device: capabilities of a device. * {CL_DEVICE_PIPE_SUPPORT} to determine whether a device supports pipe memory objects. - * {CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE} to determine the + * {CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE} to determine the preferred work-group size multiple for a device. OpenCL 3.0 adds new queries to conveniently and precisely diff --git a/api/appendix_h.asciidoc b/api/appendix_h.asciidoc index 2a2d37598..d1cef63a1 100644 --- a/api/appendix_h.asciidoc +++ b/api/appendix_h.asciidoc @@ -334,7 +334,7 @@ When sRGB images are not supported: |*Behavior* | {clGetSupportedImageFormats} -| Will not return return any image formats with `image_channel_order` equal to an sRGB image channel order if no devices in _context_ support sRGB images. +| Will not return any image formats with `image_channel_order` equal to an sRGB image channel order if no devices in _context_ support sRGB images. |==== diff --git a/api/cl_ext_cxx_for_opencl.asciidoc b/api/cl_ext_cxx_for_opencl.asciidoc new file mode 100644 index 000000000..51ead70cb --- /dev/null +++ b/api/cl_ext_cxx_for_opencl.asciidoc @@ -0,0 +1,59 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_cxx_for_opencl.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-08-25 +*IP Status*:: + No known IP claims. +*Contributors*:: + - Kevin Petit, Arm Ltd. + + - Sven Van Haastregt, Arm Ltd. + + - Anastasia Stulova, Arm Ltd. + + - Marco Antognini, Arm Ltd. + + - Neil Hickey, Arm Ltd. + + - Alastair Murray, Codeplay + + +=== Description + +This extension adds support for building programs written using the C++ for +OpenCL kernel language documented in the *OpenCL-Docs* repository +(https://github.com/KhronosGroup/OpenCL-Docs) +with stable versions published in releases of the repository. + +This extension also enables applications to query the version of the language +supported by the device compiler. + +=== New Enums + + * {cl_device_info_TYPE} + ** {CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT} + +=== New build option + +This extension adds support for a new `CLC++` value to be passed to the +`-cl-std` build option accepted by {clBuildProgram} and {clCompileProgram}. + +=== Preprocessor Macros + +This extension defines a new language, instead of extending an existing +language. As such, there will be no preprocessor `#define` matching the +extension name string. Instead, dedicated preprocessor macros conveying +language version information are available as described in the C++ for +OpenCL Programming Language Documentation, section 2.2.2.2 "Predefined +macros". + +=== Conformance tests + +. Test that a program can successfully be compiled with `-cl-std=CLC++`. +. Test with a program compiled with `-cl-std=CLC++` that the value of the + +__OPENCL_CPP_VERSION__+ macro agrees with the version returned by + `CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT`. + +=== Version History + + * Revision 1.0.0, 2020-08-24 + ** Initial version. diff --git a/api/cl_ext_device_fission.asciidoc b/api/cl_ext_device_fission.asciidoc new file mode 100644 index 000000000..8b038ef68 --- /dev/null +++ b/api/cl_ext_device_fission.asciidoc @@ -0,0 +1,20 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_device_fission.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2024-07-17 +*IP Status*:: + No known IP claims. + +=== Description + +Precursor to the functionality described in <>. + +=== Version History + + * Revision 1.0.0, 2024-07-17 + ** First version. diff --git a/api/cl_ext_float_atomics.asciidoc b/api/cl_ext_float_atomics.asciidoc new file mode 100644 index 000000000..473feb40e --- /dev/null +++ b/api/cl_ext_float_atomics.asciidoc @@ -0,0 +1,21 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_float_atomics.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2020-08-12 +*IP Status*:: + No known IP claims. + +=== Description + +The latest published specification for this extension is available on +the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_float_atomics.html[OpenCL registry]. + +=== Version History + + * Revision 1.0.0, 2020-08-12 + ** First version. diff --git a/api/cl_ext_image_from_buffer.asciidoc b/api/cl_ext_image_from_buffer.asciidoc new file mode 100644 index 000000000..6bd6fa9bd --- /dev/null +++ b/api/cl_ext_image_from_buffer.asciidoc @@ -0,0 +1,21 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_image_from_buffer.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-01-25 +*IP Status*:: + No known IP claims. + +=== Description + +The latest published specification for this extension is available on +the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_image_from_buffer.html[OpenCL registry]. + +=== Version History + + * Revision 1.0.0, 2022-01-25 + ** First version. diff --git a/api/cl_ext_image_raw10_raw12.asciidoc b/api/cl_ext_image_raw10_raw12.asciidoc new file mode 100644 index 000000000..d7d36b55d --- /dev/null +++ b/api/cl_ext_image_raw10_raw12.asciidoc @@ -0,0 +1,21 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_image_raw10_raw12.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2023-05-03 +*IP Status*:: + No known IP claims. + +=== Description + +The latest published specification for this extension is available on +the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_image_raw10_raw12.html[OpenCL registry]. + +=== Version History + + * Revision 1.0.0, 2023-05-03 + ** First version. diff --git a/api/cl_ext_image_requirements_info.asciidoc b/api/cl_ext_image_requirements_info.asciidoc new file mode 100644 index 000000000..6de780853 --- /dev/null +++ b/api/cl_ext_image_requirements_info.asciidoc @@ -0,0 +1,21 @@ +// Copyright 2018-2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_image_requirements_info.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2022-01-18 +*IP Status*:: + No known IP claims. + +=== Description + +The latest published specification for this extension is available on +the https://registry.khronos.org/OpenCL/extensions/ext/cl_ext_image_requirements_info.html[OpenCL registry]. + +=== Version History + + * Revision 0.5.0, 2022-01-18 + ** First version. diff --git a/api/cl_ext_migrate_memobject.asciidoc b/api/cl_ext_migrate_memobject.asciidoc new file mode 100644 index 000000000..b987f48f2 --- /dev/null +++ b/api/cl_ext_migrate_memobject.asciidoc @@ -0,0 +1,20 @@ +// Copyright 2024 The Khronos Group Inc. +// SPDX-License-Identifier: CC-BY-4.0 + +include::{generated}/meta/{refprefix}cl_ext_migrate_memobject.txt[] + +=== Other Extension Metadata + +*Last Modified Date*:: + 2024-07-17 +*IP Status*:: + No known IP claims. + +=== Description + +Precursor to {clEnqueueMigrateMemObjects}. + +=== Version History + + * Revision 1.0.0, 2024-07-17 + ** Initial version. diff --git a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc index 13ada6c22..486d01d12 100644 --- a/api/cl_khr_command_buffer_mutable_dispatch.asciidoc +++ b/api/cl_khr_command_buffer_mutable_dispatch.asciidoc @@ -6,7 +6,7 @@ include::{generated}/meta/{refprefix}cl_khr_command_buffer_mutable_dispatch.txt[ === Other Extension Metadata *Last Modified Date*:: - 2022-08-31 + 2024-06-19 *IP Status*:: No known IP claims. *Contributors*:: @@ -43,32 +43,15 @@ in a new command-buffer. === Interactions With Other Extensions -The {cl_command_buffer_structure_type_khr_TYPE} type has been added to this -extension for the purpose of allowing expansion of mutable functionality in -future extensions layered on top of -{cl_khr_command_buffer_mutable_dispatch_EXT}. -Any parameter that is a structure containing a `void* next` member *must* -have a value of `next` that is either `NULL`, or is a pointer to a valid -structure defined by {cl_khr_command_buffer_mutable_dispatch_EXT} or an -extension layered on top. -To be a valid structure in the pointer chain the first member of the -structure *must* be a {cl_command_buffer_structure_type_khr_TYPE} identifier -for the structure being iterated through, and the second member a `void* -next` pointer to the next structure in the chain. - -[NOTE] -==== -This approach is based on structure pointer chains in Vulkan, for more -details see the "`Valid Usage for Structure Pointer Chains`" section of the -Vulkan specification. -==== - -This is designed so that another extension layered on -{cl_khr_command_buffer_mutable_dispatch_EXT} could allow modification of -commands recorded to a command-buffer other than kernel execution commands. -As all command recording entry-points return a {cl_mutable_command_khr_TYPE} -handle, and aspects like which {cl_mem_TYPE} object a command uses could -also be updated between enqueues of the command-buffer. +The {clUpdateMutableCommandsKHR} entry-point has been designed for the purpose +of allowing expansion of mutable functionality in future extensions layered on +top of `cl_khr_command_buffer_mutable_dispatch`. + +A new extension can define its own structure type to specify the update +configuration it requires, with a matching +{cl_command_buffer_update_type_khr_TYPE} value. This new structure type can +then be passed to {clUpdateMutableCommandsKHR} where it is reinterpreted from a +void pointer using {cl_command_buffer_update_type_khr_TYPE}. === New Commands @@ -79,8 +62,7 @@ also be updated between enqueues of the command-buffer. * {cl_mutable_dispatch_fields_khr_TYPE} * {cl_mutable_command_info_khr_TYPE} - * {cl_command_buffer_structure_type_khr_TYPE} - * {cl_mutable_base_config_khr_TYPE} + * {cl_command_buffer_update_type_khr_TYPE} * {cl_mutable_dispatch_asserts_khr_TYPE} * {cl_mutable_dispatch_config_khr_TYPE} * {cl_mutable_dispatch_exec_info_khr_TYPE} @@ -115,8 +97,7 @@ also be updated between enqueues of the command-buffer. ** {CL_COMMAND_BUFFER_MUTABLE_KHR} * {cl_command_buffer_properties_khr_TYPE} ** {CL_COMMAND_BUFFER_MUTABLE_DISPATCH_ASSERTS_KHR} - * {cl_command_buffer_structure_type_khr_TYPE} - ** {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR} + * {cl_command_buffer_update_type_khr_TYPE} ** {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR} * New Error Codes ** {CL_INVALID_MUTABLE_COMMAND_KHR} @@ -274,8 +255,6 @@ kernel void vector_addition(global int* tile1, global int* tile2, cl_mutable_dispatch_arg_khr arg_2{2, sizeof(cl_mem), &output_buffer}; cl_mutable_dispatch_arg_khr args[] = {arg_0, arg_1, arg_2}; cl_mutable_dispatch_config_khr dispatch_config{ - CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, - nullptr, command_handle, 3 /* num_args */, 0 /* num_svm_arg */, @@ -287,12 +266,16 @@ kernel void vector_addition(global int* tile1, global int* tile2, nullptr /* global_work_offset */, nullptr /* global_work_size */, nullptr /* local_work_size */}; - cl_mutable_base_config_khr mutable_config{ - CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, - &dispatch_config}; // Update the command buffer with the mutable configuration - error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + cl_uint num_configs = 1; + cl_command_buffer_update_type_khr config_types[1] = { + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR + }; + const void* configs[1] = {&dispatch_config}; + error = clUpdateMutableCommandsKHR(command_buffer, num_configs, + config_types, configs); + CL_CHECK(error); } @@ -374,3 +357,6 @@ may be a introduced as a stand alone extension. * Revision 0.9.1, 2023-11-07 ** Add type {cl_mutable_dispatch_asserts_khr_TYPE} and its possible values (provisional). + * Revision 0.9.2, 2024-06-19 + ** Change {clUpdateMutableCommandsKHR} API to pass configs as an array rather + than linked list (provisional). diff --git a/api/cl_khr_egl_image.asciidoc b/api/cl_khr_egl_image.asciidoc index 42e03b71e..dcea8fd3e 100644 --- a/api/cl_khr_egl_image.asciidoc +++ b/api/cl_khr_egl_image.asciidoc @@ -13,7 +13,7 @@ include::{generated}/meta/{refprefix}cl_khr_egl_image.txt[] === Description {cl_khr_egl_image_EXT} provides a mechanism to creating OpenCL memory objects -from from EGLImages. +from EGLImages. === New Commands diff --git a/api/cl_khr_external_memory.asciidoc b/api/cl_khr_external_memory.asciidoc index cd572a8fb..3d61b564a 100644 --- a/api/cl_khr_external_memory.asciidoc +++ b/api/cl_khr_external_memory.asciidoc @@ -60,7 +60,7 @@ imported into OpenCL. * {cl_mem_properties_TYPE} ** {CL_MEM_DEVICE_HANDLE_LIST_KHR} ** {CL_MEM_DEVICE_HANDLE_LIST_END_KHR} - * Return values from from {clGetEventInfo} when _param_name_ is + * Return values from {clGetEventInfo} when _param_name_ is {cl_command_type_TYPE}: ** {CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR} ** {CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR} diff --git a/api/cl_khr_external_semaphore_sync_fd.asciidoc b/api/cl_khr_external_semaphore_sync_fd.asciidoc index f8203ce90..ae1bc7891 100644 --- a/api/cl_khr_external_semaphore_sync_fd.asciidoc +++ b/api/cl_khr_external_semaphore_sync_fd.asciidoc @@ -38,6 +38,7 @@ external semaphore using the APIs introduced by === New Commands * {clGetSemaphoreHandleForTypeKHR} + * {clReImportSemaphoreSyncFdKHR} === New Types @@ -58,3 +59,5 @@ external semaphore using the APIs introduced by ** Added re-import function call to {cl_khr_external_semaphore_sync_fd_EXT} * Revision 1.0.0, 2024-03-15 ** First non-provisional version. + * Revision 1.0.1, 2024-08-06 + ** Clarify what re-import properties are accepted by {clReImportSemaphoreSyncFdKHR}. diff --git a/api/cl_khr_priority_hints.asciidoc b/api/cl_khr_priority_hints.asciidoc index dfd29df15..988498b53 100644 --- a/api/cl_khr_priority_hints.asciidoc +++ b/api/cl_khr_priority_hints.asciidoc @@ -14,7 +14,7 @@ include::{generated}/meta/{refprefix}cl_khr_priority_hints.txt[] The {cl_khr_priority_hints_EXT} extension adds priority hints for OpenCL, but does not specify the scheduling behavior or minimum guarantees. -It is expected that the the user guides associated with each implementation +It is expected that the user guides associated with each implementation which supports this extension will describe the scheduling behavior guarantees. diff --git a/api/footnotes.asciidoc b/api/footnotes.asciidoc index cc407d98f..b5452357e 100644 --- a/api/footnotes.asciidoc +++ b/api/footnotes.asciidoc @@ -11,7 +11,7 @@ Note that this flag does not provide meaning for atomic memory operations, but o ] :fn-create-context-all-or-subset: pass:n[ \ -{clCreateContextfromType} may may create a context for all or a subset of the actual physical devices present in the platform that match _device_type_. \ +{clCreateContextfromType} may create a context for all or a subset of the actual physical devices present in the platform that match _device_type_. \ ] :fn-default-device-queue: pass:n[ \ diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index 57cc2c17d..38d33377f 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -198,10 +198,12 @@ A command submitted to a device will not launch until prerequisites that constrain the order of commands have been resolved. These prerequisites have three sources: - * They may arise from commands submitted to a command-queue that constrain - the order in which commands are launched. - For example, commands that follow a command-queue barrier will not - launch until all commands prior to the barrier are complete. + * The first source of prerequisites is implicit dependencies between commands + enqueued to the same command-queue which arise as follows: + ** Commands enqueued after a command-queue barrier have the preceding barrier command + as a prerequisite. + ** Commands enqueued in an in-order command-queue have the command enqueued + before them as a prerequisite. * The second source of prerequisites is dependencies between commands expressed through events. A command may include an optional list of events. @@ -677,7 +679,7 @@ The OpenCL execution model supports three types of kernels: * *OpenCL kernels* are managed by the OpenCL API as kernel objects associated with kernel functions within program objects. OpenCL program objects are created and built using OpenCL APIs. - The OpenCL API includes functions to query the kernel languages and + The OpenCL API includes functions to query the kernel languages and intermediate languages that may be used to create OpenCL program objects for a device. * *Native kernels* are accessed through a host function pointer. diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index 99a6653cd..2b1ae266d 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -395,13 +395,13 @@ include::{generated}/api/version-notes/CL_DEVICE_TYPE_DEFAULT.asciidoc[] {clGetDeviceIDs} or to create OpenCL contexts using {clCreateContextFromType}, and will never be returned in {CL_DEVICE_TYPE} for any OpenCL device. - The default OpenCL device must not be a {CL_DEVICE_TYPE_CUSTOM} device. + The default OpenCL device must not be a {CL_DEVICE_TYPE_CUSTOM} device + unless it is the only device in the platform. | {CL_DEVICE_TYPE_ALL_anchor} include::{generated}/api/version-notes/CL_DEVICE_TYPE_ALL.asciidoc[] - | All OpenCL devices available in the platform, except for - {CL_DEVICE_TYPE_CUSTOM} devices. + | All OpenCL devices in the platform. {CL_DEVICE_TYPE_ALL} is only used to query OpenCL devices using {clGetDeviceIDs} or to create OpenCL contexts using {clCreateContextFromType}, and will never be returned in {CL_DEVICE_TYPE} @@ -2083,6 +2083,15 @@ include::{generated}/api/version-notes/CL_DEVICE_TERMINATE_CAPABILITY_KHR.asciid {CL_DEVICE_TERMINATE_CAPABILITY_CONTEXT_KHR_anchor} - Indicates that context termination is supported. endif::cl_khr_terminate_context[] + +ifdef::cl_ext_cxx_for_opencl[] +| {CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT_anchor} + +include::{generated}/api/version-notes/CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT.asciidoc[] + | {cl_version_TYPE} + | Returns the version of the C++ for OpenCL language supported by the + device compiler. +endif::cl_ext_cxx_for_opencl[] |==== ifdef::cl_khr_integer_dot_product[] @@ -2630,6 +2639,7 @@ Otherwise it may return endif::cl_khr_d3d11_sharing[] +[[platform-device-partitioning]] == Partitioning a Device NOTE: Partitioning devices is <> version 1.2. diff --git a/api/opencl_runtime_layer.asciidoc b/api/opencl_runtime_layer.asciidoc index 257267e74..6bda5b9d8 100644 --- a/api/opencl_runtime_layer.asciidoc +++ b/api/opencl_runtime_layer.asciidoc @@ -628,8 +628,9 @@ returned in _errcode_ret_: in the <> table. * {CL_INVALID_BUFFER_SIZE} if _size_ is 0, or if _size_ is greater than {CL_DEVICE_MAX_MEM_ALLOC_SIZE} for all devices in _context_, or if - {CL_MEM_USE_HOST_PTR} is set in _flags_ and _host_ptr_ is a pointer returned by - {clSVMAlloc} and _size_ is greater than the size passed to {clSVMAlloc}. + {CL_MEM_USE_HOST_PTR} or {CL_MEM_COPY_HOST_PTR} is set in _flags_ and + _host_ptr_ is a pointer returned by {clSVMAlloc} and _size_ is greater than + the size passed to {clSVMAlloc}. * {CL_INVALID_HOST_PTR} if _host_ptr_ is `NULL` and {CL_MEM_USE_HOST_PTR} or {CL_MEM_COPY_HOST_PTR} are set in _flags_ or if _host_ptr_ is not `NULL` but {CL_MEM_COPY_HOST_PTR} or {CL_MEM_USE_HOST_PTR} are not set in _flags_. @@ -1836,7 +1837,7 @@ include::{generated}/api/version-notes/clCreateFromGLBuffer.asciidoc[] Only the {CL_MEM_READ_ONLY}, {CL_MEM_WRITE_ONLY} and {CL_MEM_READ_WRITE} flags specified in that table can be used. * _bufobj_ is the name of an OpenGL buffer object. - The data store of the OpenGL buffer object must have have been + The data store of the OpenGL buffer object must have been previously created by calling `glBufferData`, although its contents need not be initialized. The size of the data store will be used to determine the size of the @@ -2524,14 +2525,14 @@ The memory layout of this image format is described below: [width="60%",cols="<10%,<10%,<10%,<10%,<60%"] |==== -| R | G | B | A | ... | +| R | G | B | A | ... |==== with the corresponding byte offsets [width="60%",cols="<10%,<10%,<10%,<10%,<60%"] |==== -| 0 | 1 | 2 | 3 | ... | +| 0 | 1 | 2 | 3 | ... |==== Similar, if `image_channel_order` = {CL_RGBA} and `image_channel_data_type` = @@ -2539,14 +2540,14 @@ Similar, if `image_channel_order` = {CL_RGBA} and `image_channel_data_type` = [width="60%",cols="<10%,<10%,<10%,<10%,<60%"] |==== -| R | G | B | A | ... | +| R | G | B | A | ... |==== with the corresponding byte offsets [width="60%",cols="<10%,<10%,<10%,<10%,<60%"] |==== -| 0 | 2 | 4 | 6 | ... | +| 0 | 2 | 4 | 6 | ... |==== `image_channel_data_type` values of {CL_UNORM_SHORT_565}, {CL_UNORM_SHORT_555}, @@ -2645,7 +2646,7 @@ ifdef::cl_khr_external_memory[] If _image_slice_pitch_ is zero and the image is created from an external memory handle, then the image slice pitch is implementation-defined. endif::cl_khr_external_memory[] - The image slice pitch must be {geq} the image image row pitch {times} + The image slice pitch must be {geq} the image row pitch {times} _image_height_ for a 2D image array or a 3D image, must be {geq} the image row pitch for a 1D image array, and must be a multiple of the image row pitch. @@ -5427,6 +5428,23 @@ handle is used by an OpenCL command queued to a command-queue without being acquired. This is to guarantee that the state of the memory objects is up-to-date and they are accessible to OpenCL. + +The following restrictions shall apply - + * Each memory object must be acquired only once. Acquiring a memory object + multiple times without releasing it results in implementation-defined + behavior. + * The acquire must be performed on a command-queue associated with a device + that was one of the devices specified via {CL_MEM_DEVICE_HANDLE_LIST_KHR} + when the memory object was imported using {clCreateBufferWithProperties} or + {clCreateImageWithProperties}. If {CL_MEM_DEVICE_HANDLE_LIST_KHR} was not + specified, the acquire can be performed on a command-queue associated with + any device in the context. + * The memory object will be acquired for all devices specified + via {CL_MEM_DEVICE_HANDLE_LIST_KHR} when the memory object was imported + using {clCreateBufferWithProperties} or {clCreateImageWithProperties}. + If {CL_MEM_DEVICE_HANDLE_LIST_KHR} was not specified, the memory object + will be acquired for all devices in the context. + See <> for more details on how to use this API. @@ -5450,9 +5468,9 @@ Otherwise, it returns one of the following errors: ** if _command_queue_ is not a valid command-queue, or ** if device associated with _command_queue_ is not one of the devices specified by {CL_MEM_DEVICE_HANDLE_LIST_KHR} at the time of creating - one or more of _mem_objects_, or ** if one or more of _mem_objects_ - belong to a context that does not contain a device associated with - _command_queue_. + one or more of _mem_objects_, or + ** if one or more of _mem_objects_ belong to a context that does not + contain a device associated with _command_queue_. * {CL_INVALID_EVENT_WAIT_LIST} ** if _event_wait_list_ is `NULL` and _num_events_in_wait_list_ is not 0, or @@ -5503,6 +5521,23 @@ Applications must release the memory objects that are acquired using commands in the other API. This is to guarantee that the state of memory objects is up-to-date and they are accessible to the other API. + +The following restrictions shall apply - + * Each memory object must be released only once. Releasing a memory object + multiple times without acquiring it results in implementation-defined + behavior. + * The release must be performed on a command-queue associated with a device + that was one of the devices specified via {CL_MEM_DEVICE_HANDLE_LIST_KHR} + when the memory object was imported using {clCreateBufferWithProperties} or + {clCreateImageWithProperties}. If {CL_MEM_DEVICE_HANDLE_LIST_KHR} was not + specified, the release can be performed on a command-queue associated with + any device in the context. + * The memory object will be released for all devices specified via + {CL_MEM_DEVICE_HANDLE_LIST_KHR} when the memory object was imported + using {clCreateBufferWithProperties} or {clCreateImageWithProperties}. + If {CL_MEM_DEVICE_HANDLE_LIST_KHR} was not specified, the memory object + will be released for all devices in the context. + See "`Example with Acquire / Release`" provided in <> for more details on how to use this API. @@ -7063,7 +7098,7 @@ include::{generated}/api/version-notes/clEnqueueReleaseGLObjects.asciidoc[] to an element of the _event_wait_list_ array. ifdef::cl_khr_gl_event[] -If an OpenGL context is bound to the current thread, then then any OpenGL +If an OpenGL context is bound to the current thread, then any OpenGL commands which . affect or access the contents of the memory objects listed in the @@ -8372,7 +8407,7 @@ A program object encapsulates the following information: [open,refpage='clCreateProgramWithSource',desc='Creates a program object for a context, and loads source code specified by text strings into the program object.',type='protos'] -- -To creates a program object for a context and load source code into that +To create a program object for a context and load source code into that object, call the function include::{generated}/api/protos/clCreateProgramWithSource.txt[] @@ -8402,6 +8437,10 @@ header or implementation-defined source for custom devices that support an online compiler. OpenCL {cpp} is not supported as an online-compiled kernel language through this interface. +ifdef::cl_ext_cxx_for_opencl[] +If the {cl_ext_cxx_for_opencl_EXT} extension is supported, the source code +specified by _strings_ may also be a C++ for OpenCL program source or header. +endif::cl_ext_cxx_for_opencl[] // refError @@ -9494,6 +9533,13 @@ IMPORTANT: Debugging options are <> version 2.0. built-in functions that allow you to enqueue commands on a device (refer to OpenCL kernel languages specifications). +ifdef::cl_ext_cxx_for_opencl[] +==== C++ for OpenCL + +Applications may pass `-cl-std=CLC\++` to {clCompileProgram} or {clBuildProgram} +for programs created using {clCreateProgramFromSource} to request the program +be built as C++ for OpenCL. +endif::cl_ext_cxx_for_opencl[] [[linker-options]] === Linker Options @@ -12828,7 +12874,7 @@ _errcode_ret_ returns an appropriate error code. If _errcode_ret_ is `NULL`, no error code is returned. {clCreateSemaphoreWithPropertiesKHR} returns a valid semaphore object in an -un-signaled state and and _errcode_ret_ is set to {CL_SUCCESS} if the +un-signaled state and _errcode_ret_ is set to {CL_SUCCESS} if the function is executed successfully. Otherwise, it returns a `NULL` value with one of the following error values returned in _errcode_ret_: @@ -12876,6 +12922,10 @@ ifdef::cl_khr_external_semaphore[] [open,refpage='clGetSemaphoreHandleForTypeKHR',desc='Export external handle from a semaphore',type='protos'] -- + +Export operations have the same transference as the specified handle type's import operations. Additionally, exporting a semaphore payload to a handle with copy transference has the same side effects on the source semaphore's payload as executing a semaphore wait operation. + +Please refer to handle specific documentation for more details on transference requirements per handle type. To export an external handle from a semaphore, call the function include::{generated}/api/protos/clGetSemaphoreHandleForTypeKHR.txt[] @@ -12890,7 +12940,7 @@ include::{generated}/api/protos/clGetSemaphoreHandleForTypeKHR.txt[] * _handle_size_ specifies the size of memory pointed by _handle_ptr_. * _handle_ptr_ is a pointer to memory where the exported external handle is returned. - If _param_value_ is `NULL`, it is ignored. + If _handle_ptr_ is `NULL`, it is ignored. * _handle_size_ret_ returns the actual size in bytes for the external handle. If _handle_size_ret_ is `NULL`, it is ignored. @@ -12929,53 +12979,19 @@ Otherwise, it returns one of the following errors: === Importing Semaphore External Handles -Applications can import a semaphore payload into an existing semaphore using -an external semaphore handle. -The effects of the import operation will be either temporary or permanent, -as specified by the application. -If the import is temporary, the implementation must restore the semaphore to -its prior permanent state after submitting the next semaphore wait -operation. -Performing a subsequent temporary import on a semaphore before performing a -semaphore wait has no effect on this requirement; the next wait submitted on -the semaphore must still restore its last permanent state. -A permanent payload import behaves as if the target semaphore was destroyed, -and a new semaphore was created with the same handle but the imported -payload. -Because importing a semaphore payload temporarily or permanently detaches -the existing payload from a semaphore, similar usage restrictions to those -applied to {clReleaseSemaphoreKHR} are applied to any command that imports a -semaphore payload. -Which of these import types is used is referred to as the import operation's -permanence. -Each handle type supports either one or both types of permanence. - -The implementation must perform the import operation by either referencing -or copying the payload referred to by the specified external semaphore -handle, depending on the handle's type. -The import method used is referred to as the handle type's transference. -When using handle types with reference transference, importing a payload to -a semaphore adds the semaphore to the set of all semaphores sharing that -payload. -This set includes the semaphore from which the payload was exported. -Semaphore signaling and waiting operations performed on any semaphore in the -set must behave as if the set were a single semaphore. -Importing a payload using handle types with copy transference creates a -duplicate copy of the payload at the time of import, but makes no further -reference to it. -Semaphore signaling and waiting operations performed on the target of copy -imports must not affect any other semaphore or payload. - -Export operations have the same transference as the specified handle type's -import operations. -Additionally, exporting a semaphore payload to a handle with copy -transference has the same side effects on the source semaphore's payload as -executing a semaphore wait operation. -If the semaphore was using a temporarily imported payload, the semaphore's -prior permanent payload will be restored. - -Please refer to handle specific specifications for more details on -transference and permanence requirements specific to handle type. +Applications can import a semaphore payload by creating a semaphore from an external handle. The +implementation must perform the import operation by either referencing or copying the payload +referred to by the specified external semaphore handle, depending on the handle's type. When using +handle types with reference transference, importing a payload to a semaphore adds the semaphore to +the set of all semaphores sharing that payload. This set includes the semaphore from which the payload +was exported. Semaphore signaling and waiting operations performed on any semaphore in the set must +behave as if the set were a single semaphore. Importing a payload using handle types with copy +transference creates a duplicate copy of the payload at the time of import, but makes no further +reference to it. Semaphore signaling and waiting operations performed on the target of copy imports +must not affect any other semaphore or payload. + +Please refer to handle specific documentation for more details on transference requirements per +handle type. === Descriptions of External Semaphore Handle Types @@ -13044,17 +13060,16 @@ descriptor when exporting a {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} from a endif::cl_khr_external_semaphore_sync_fd[] -.Transference and Permanence Properties for File Descriptor Handles -[width="100%",cols="60%,<20%,<20%",options="header"] +.Transference Properties for File Descriptor Handles +[width="100%",cols="60%,<40%",options="header"] |==== -| Handle Type | Transference | Permanence +| Handle Type | Transference ifdef::cl_khr_external_semaphore_opaque_fd[] | {CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR_anchor} include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR.asciidoc[] | Reference - | Temporary, Permanent endif::cl_khr_external_semaphore_opaque_fd[] ifdef::cl_khr_external_semaphore_sync_fd[] @@ -13062,7 +13077,6 @@ ifdef::cl_khr_external_semaphore_sync_fd[] include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_SYNC_FD_KHR.asciidoc[] | Copy - | Temporary endif::cl_khr_external_semaphore_sync_fd[] |==== @@ -13074,14 +13088,17 @@ a successful import. ifdef::cl_khr_external_semaphore_sync_fd[] [open,refpage='clReImportSemaphoreSyncFdKHR',desc='Re-import sync fd handle into an existing semaphore',type='protos'] -- -To re-imported a handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} into an +To re-import a handle of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR} into an existing semaphore, call the function: include::{generated}/api/protos/clReImportSemaphoreSyncFdKHR.txt[] * _sema_object_ specifies a valid semaphore object with importable properties. - * _reimport_props_ must be `NULL`, and is reserved for future use. + * _reimport_props_ is an optional list of properties that affect the + re-import behavior. The list is terminated with the special property `0`. + If no properties are required, _reimport_props_ may be `NULL`. This extension + does not define any optional properties. * _fd_ specifies an external file descriptor handle to import Calling {clReImportSemaphoreSyncFdKHR} is equivalent to destroying @@ -13093,7 +13110,7 @@ of type {CL_SEMAPHORE_HANDLE_SYNC_FD_KHR}. // refError -{clGetSemaphoreHandleForTypeKHR} returns {CL_SUCCESS} if the semaphore +{clReImportSemaphoreSyncFdKHR} returns {CL_SUCCESS} if the semaphore handle is re-imported successfully. Otherwise, it returns one of the following errors: @@ -13148,26 +13165,26 @@ a semaphore from an external handle: endif::cl_khr_external_semaphore_win32[] -.Transference and Permanence Properties for NT Handle Types -[width="100%",cols="60%,<20%,<20%",options="header"] +.Transference Properties for NT Handle Types +[width="100%",cols="60%,<40%",options="header"] |==== -| Handle Type | Transference | Permanence +| Handle Type | Transference ifdef::cl_khr_external_semaphore_win32[] | {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR_anchor} include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR.asciidoc[] | Reference - | Temporary, Permanent | {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR_anchor} include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR.asciidoc[] | Reference - | Temporary, Permanent + +| {CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_NAMET_KHR_anchor} include::{generated}/api/version-notes/CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_NAME_KHR.asciidoc[] | Reference - | Temporary, Permanent + endif::cl_khr_external_semaphore_win32[] |==== @@ -14337,10 +14354,17 @@ include::{generated}/api/protos/clCommandBarrierWithWaitListKHR.txt[] include::{generated}/api/version-notes/clCommandBarrierWithWaitListKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. - * _command_queue_ specifies the command-queue the command will be recorded - to. - This parameter is unused, as only a single - command-queue is supported, and **must** be `NULL`. + * _command_queue_ specifies the command-queue the command will be recorded to. + {empty} + + If the {cl_khr_command_buffer_multi_device_EXT} extension is not supported, + only a single command-queue is supported, and _command_queue_ must be + `NULL`. +ifdef::cl_khr_command_buffer_multi_device[] + {empty} + + If the {cl_khr_command_buffer_multi_device_EXT} extension is supported and + _command_queue_ is `NULL`, then only one command-queue must have been set on + _command_buffer_ creation; otherwise, _command_queue_ must not be `NULL`. +endif::cl_khr_command_buffer_multi_device[] * _sync_point_wait_list_, _num_sync_points_in_wait_list_ specify synchronization-points that need to complete before this particular command can be executed. @@ -14388,11 +14412,20 @@ recorded after it do not execute until it completes. executed successfully. Otherwise, it returns one of the following errors: - * {CL_INVALID_COMMAND_QUEUE} if _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + {cl_khr_command_buffer_multi_device_EXT} extension is not supported and + _command_queue_ is not `NULL`. + * {CL_INVALID_COMMAND_QUEUE} if the + {cl_khr_command_buffer_multi_device_EXT} extension is supported; and + either _command_queue_ is `NULL` and _command_buffer_ was created with + more than one queue, or _command_queue_ is not `NULL` and not a + command-queue listed on _command_buffer_ creation. * {CL_INVALID_COMMAND_BUFFER_KHR} if _command_buffer_ is not a valid command-buffer. - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_ and - _command_buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] * {CL_INVALID_OPERATION} if _command_buffer_ has been finalized. * {CL_INVALID_VALUE} if _mutable_handle_ is not `NULL`. * {CL_INVALID_SYNC_POINT_WAIT_LIST_KHR} if _sync_point_wait_list_ is @@ -14474,8 +14507,12 @@ Otherwise, it returns the errors defined by {clEnqueueCopyBuffer} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_buffer_, and _dst_buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14579,8 +14616,12 @@ except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_buffer_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_buffer_, and _dst_buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14667,8 +14708,12 @@ except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_buffer_, and _dst_image_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_buffer_, and _dst_image_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14761,8 +14806,12 @@ Otherwise, it returns the errors defined by {clEnqueueCopyImage} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_image_, and _dst_image_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_image_, and _dst_image_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14849,8 +14898,12 @@ except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, _src_image_, and _dst_buffer_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_, + _src_image_, and _dst_buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -14944,8 +14997,12 @@ Otherwise, it returns the errors defined by {clEnqueueFillBuffer} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _buffer_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_ and + _buffer_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -15037,8 +15094,12 @@ Otherwise, it returns the errors defined by {clEnqueueFillImage} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _image_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_ and + _image_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -15281,8 +15342,12 @@ Otherwise, it returns the errors defined by {clEnqueueNDRangeKernel} except: {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. + * {CL_INVALID_CONTEXT} if the context associated with _command_buffer_ and + _kernel_ is not the same. +ifdef::cl_khr_command_buffer_multi_device[] + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -15395,10 +15460,17 @@ Otherwise, it returns the errors defined by {clEnqueueSVMMemcpy} except: more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. +ifdef::cl_khr_command_buffer_multi_device[] {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. + +endif::cl_khr_command_buffer_multi_device[] + +ifndef::cl_khr_command_buffer_multi_device[] +{CL_INVALID_CONTEXT} error case is removed. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -15501,10 +15573,16 @@ successfully. Otherwise, it returns the errors defined by more than one queue, or _command_queue_ is not `NULL` and not a command-queue listed on _command_buffer_ creation. +ifdef::cl_khr_command_buffer_multi_device[] {CL_INVALID_CONTEXT} is replaced with: - * {CL_INVALID_CONTEXT} if the context associated with _command_queue_, - _command_buffer_, and _kernel_ are not the same. + * {CL_INVALID_CONTEXT} if _command_queue_ is not `NULL`, and the context + associated with _command_queue_ and _command_buffer_ is not the same. +endif::cl_khr_command_buffer_multi_device[] + +ifndef::cl_khr_command_buffer_multi_device[] +{CL_INVALID_CONTEXT} error case is removed. +endif::cl_khr_command_buffer_multi_device[] {CL_INVALID_EVENT_WAIT_LIST} is replaced with: @@ -15642,7 +15720,7 @@ endif::cl_khr_command_buffer_multi_device[] ifdef::cl_khr_command_buffer_mutable_dispatch[] [[mutable-commands]] -=== Mutable Commands: +=== Mutable Commands A generic {cl_mutable_command_khr_TYPE} handle is called a _mutable-command_ object as it can be returned from any command recording entry-point in the @@ -15653,11 +15731,10 @@ modified through the fields of {cl_mutable_dispatch_config_khr_TYPE}. Mutable-command handles are updated between enqueues using entry-point {clUpdateMutableCommandsKHR}. -To enable performant usage, all aspects of mutation are encapsulated inside -a single {cl_mutable_base_config_khr_TYPE} parameter. -This means that the runtime has access to all the information about how the -command-buffer will change, allowing the command-buffer to be rebuilt as -efficiently as possible. +To enable performant usage, all aspects of mutation can be passed in a single +call using an array. This means that the runtime has access to all the +information about how the command-buffer will change, allowing the +command-buffer to be rebuilt as efficiently as possible. Any modifications to the arguments or execution info of a mutable-dispatch handle using {cl_mutable_dispatch_arg_khr_TYPE} or {cl_mutable_dispatch_exec_info_khr_TYPE} have no affect on the original @@ -15698,8 +15775,13 @@ include::{generated}/api/protos/clUpdateMutableCommandsKHR.txt[] include::{generated}/api/version-notes/clUpdateMutableCommandsKHR.asciidoc[] * _command_buffer_ refers to a valid command-buffer object. - * _mutable_config_ is a pointer to a {cl_mutable_base_config_khr_TYPE} - structure defining updates to make to mutable-commands. + * _num_configs_ Number of elements in the _config_types_ and _config_ arrays. + * _config_types_ An array of length _num_configs_ with each element identifying + the type of each config in _configs_ at the same array index. + * _configs_ An array of length _num_configs_ containing structs which define how a + mutable-command handle in _command_buffer_ is to be updated, each of which is + interpreted using _config_types_ at the same index with the mapping defined + in the <> section. // refError @@ -15713,16 +15795,13 @@ one of the errors below is returned: * {CL_INVALID_OPERATION} if _command_buffer_ has not been finalized. * {CL_INVALID_OPERATION} if _command_buffer_ was not created with the {CL_COMMAND_BUFFER_MUTABLE_KHR} flag. - * {CL_INVALID_VALUE} if the _type_ member of _mutable_config_ is not - {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR}. - * {CL_INVALID_VALUE} if the _mutable_dispatch_list_ member of - _mutable_config_ is `NULL` and _num_mutable_dispatch_ > 0, or - _mutable_dispatch_list_ is not `NULL` and _num_mutable_dispatch_ is 0. - * {CL_INVALID_VALUE} if the _next_ member of _mutable_config_ is not - `NULL` and any iteration of the structure pointer chain does not contain - valid _type_ and _next_ members. - * {CL_INVALID_VALUE} if _mutable_config_ is `NULL`, or if both _next_ and - _mutable_dispatch_list_ members of _mutable_config_ are `NULL`. + * {CL_INVALID_VALUE} if _config_types_ is `NULL` and _num_configs_ > 0, or + _config_types_ is not `NULL` and _num_configs_ is 0. + * {CL_INVALID_VALUE} if _configs_ is `NULL` and _num_configs_ > 0, or + _configs_ is not `NULL` and _num_configs_ is 0. + * {CL_INVALID_VALUE} if any element of _config_types_ is not a valid + {cl_command_buffer_update_type_khr_TYPE} enum. + * {CL_INVALID_VALUE} if any element of _configs_ is NULL. * {CL_OUT_OF_RESOURCES} if there is a failure to allocate resources required by the OpenCL implementation on the device. * {CL_OUT_OF_HOST_MEMORY} if there is a failure to allocate resources @@ -15748,19 +15827,17 @@ parameters are updated so that the new number of work-groups exceeds the number when the ND-range command was recorded, the behavior is undefined. ==== -If the _mutable_dispatch_list_ member of _mutable_config_ is non-`NULL`, -then errors defined by {clEnqueueNDRangeKernel}, {clSetKernelExecInfo}, -{clSetKernelArg}, and {clSetKernelArgSVMPointer} are returned by -{clUpdateMutableCommandsKHR} if any of the array elements are set to an -invalid value. -Additionally, the following errors are returned if any -{cl_mutable_dispatch_config_khr_TYPE} element of the array violates the -defined conditions: +If _configs_ is non-`NULL`, then for any {cl_mutable_dispatch_config_khr_TYPE} +element of the array the errors defined by {clEnqueueNDRangeKernel}, +{clSetKernelExecInfo}, {clSetKernelArg}, and {clSetKernelArgSVMPointer} are +returned by {clUpdateMutableCommandsKHR} if any of the struct elements are set +to an invalid value. Additionally, the following errors are returned if any +{cl_mutable_dispatch_config_khr_TYPE} element of the array violates the defined +conditions: * {CL_INVALID_MUTABLE_COMMAND_KHR} if _command_ is not a valid mutable - command object, or created from _command_buffer_. - * {CL_INVALID_VALUE} if _type_ is not - {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR}. + command object returned from {clCommandNDRangeKernelKHR}, or created from + _command_buffer_. * {CL_INVALID_OPERATION} if the values of _local_work_size_ and/or _global_work_size_ result in a change to work-group uniformity. * {CL_INVALID_OPERATION} if the _work_dim_ is different from the @@ -15788,24 +15865,25 @@ defined conditions: 0, or _exec_info_list_ is not `NULL` and _num_exec_infos_ is 0. -- -[open,refpage='cl_mutable_base_config_khr',desc='DESC',type='structs'] --- -The {cl_mutable_base_config_khr_TYPE} structure encapsulates all aspects of -mutation and is defined as: +[[mutable-commands-update-structs]] +==== Mutable Command Update Structs -include::{generated}/api/structs/cl_mutable_base_config_khr.txt[] +The following table defines the mapping of +{cl_command_buffer_update_type_khr_TYPE} values to the structs they define +reinterpreting a void pointer as when passed to {clUpdateMutableCommandsKHR}. - * _type_ is the type of this structure, and must be - {CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR_anchor} - * _next_ is `NULL` or a pointer to an extending structure. - * _num_mutable_dispatch_ is the number of mutable-dispatch objects to - configure in this enqueue of the command-buffer. - * _mutable_dispatch_list_ is an array containing _num_mutable_dispatch_ - elements describing the configurations of mutable kernel execution - commands in the command-buffer. - For a description of struct members making up each array element see - {cl_mutable_dispatch_config_khr_TYPE}. --- +[[update-config-mapping]] +[cols=",,",options="header",] +|==== +| Enum Value | Struct Type | Entry Point + +| {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR_anchor} +| {cl_mutable_dispatch_config_khr_TYPE} +| {clCommandNDRangeKernelKHR} + +|==== + +==== Kernel Command Update Structs [open,refpage='cl_mutable_dispatch_config_khr',desc='Set kernel configuration of a mutable clCommandNDRangeKernelKHR command',type='structs'] -- @@ -15815,9 +15893,6 @@ The {cl_mutable_dispatch_arg_khr_TYPE} structure is passed to include::{generated}/api/structs/cl_mutable_dispatch_config_khr.txt[] - * _type_ is the type of this structure, and must be - {CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR_anchor}. - * _next_ is `NULL` or a pointer to an extending structure. * _command_ is a mutable-command object returned by {clCommandNDRangeKernelKHR} representing a kernel execution as part of a command-buffer. diff --git a/c/feature-dictionary.asciidoc b/c/feature-dictionary.asciidoc index e8375eb57..6e558f24c 100644 --- a/c/feature-dictionary.asciidoc +++ b/c/feature-dictionary.asciidoc @@ -10,162 +10,4 @@ ifndef::backend-html5[] :opencl_c_feature_name: pass:q[`\__opencl_c_​<feature_​name>`] endif::[] -// opencl_c_3d_image_writes -ifdef::backend-html5[] -:opencl_c_3d_image_writes: pass:q[`\__opencl_c_3d_image_writes`] -endif::[] -ifndef::backend-html5[] -:opencl_c_3d_image_writes: pass:q[`\__opencl_c_​3d_​image_​writes`] -endif::[] - -// opencl_c_atomic_order_acq_rel -ifdef::backend-html5[] -:opencl_c_atomic_order_acq_rel: pass:q[`\__opencl_c_atomic_order_acq_rel`] -endif::[] -ifndef::backend-html5[] -:opencl_c_atomic_order_acq_rel: pass:q[`\__opencl_c_​atomic_​order_​​`] -endif::[] - -// opencl_c_atomic_order_seq_cst -ifdef::backend-html5[] -:opencl_c_atomic_order_seq_cst: pass:q[`\__opencl_c_atomic_order_seq_cst`] -endif::[] -ifndef::backend-html5[] -:opencl_c_atomic_order_seq_cst: pass:q[`\__opencl_c_​atomic_​order_​seq_​cst`] -endif::[] - -// opencl_c_atomic_scope_device -ifdef::backend-html5[] -:opencl_c_atomic_scope_device: pass:q[`\__opencl_c_atomic_scope_device`] -endif::[] -ifndef::backend-html5[] -:opencl_c_atomic_scope_device: pass:q[`\__opencl_c_​atomic_​scope_​device`] -endif::[] - -// opencl_c_atomic_scope_all_devices -ifdef::backend-html5[] -:opencl_c_atomic_scope_all_devices: pass:q[`\__opencl_c_atomic_scope_all_devices`] -endif::[] -ifndef::backend-html5[] -:opencl_c_atomic_scope_all_devices: pass:q[`\__opencl_c_​atomic_​scope_​all_​devices`] -endif::[] - -// opencl_c_device_enqueue -ifdef::backend-html5[] -:opencl_c_device_enqueue: pass:q[`\__opencl_c_device_enqueue`] -endif::[] -ifndef::backend-html5[] -:opencl_c_device_enqueue: pass:q[`\__opencl_c_​device_​enqueue`] -endif::[] - -// opencl_c_generic_address_space -ifdef::backend-html5[] -:opencl_c_generic_address_space: pass:q[`\__opencl_c_generic_address_space`] -endif::[] -ifndef::backend-html5[] -:opencl_c_generic_address_space: pass:q[`\__opencl_c_​generic_​address_​space`] -endif::[] - -// opencl_c_fp64 -ifdef::backend-html5[] -:opencl_c_fp64: pass:q[`\__opencl_c_fp64`] -endif::[] -ifndef::backend-html5[] -:opencl_c_fp64: pass:q[`\__opencl_c_​fp64`] -endif::[] - -// opencl_c_images -ifdef::backend-html5[] -:opencl_c_images: pass:q[`\__opencl_c_images`] -endif::[] -ifndef::backend-html5[] -:opencl_c_images: pass:q[`\__opencl_c_​images`] -endif::[] - -// opencl_c_int64 -ifdef::backend-html5[] -:opencl_c_int64: pass:q[`\__opencl_c_int64`] -endif::[] -ifndef::backend-html5[] -:opencl_c_int64: pass:q[`\__opencl_c_​int64`] -endif::[] - -// opencl_c_pipes -ifdef::backend-html5[] -:opencl_c_pipes: pass:q[`\__opencl_c_pipes`] -endif::[] -ifndef::backend-html5[] -:opencl_c_pipes: pass:q[`\__opencl_c_​pipes`] -endif::[] - -// opencl_c_program_scope_global_variables -ifdef::backend-html5[] -:opencl_c_program_scope_global_variables: pass:q[`\__opencl_c_program_scope_global_variables`] -endif::[] -ifndef::backend-html5[] -:opencl_c_program_scope_global_variables: pass:q[`\__opencl_c_​program_​scope_​global_​variables`] -endif::[] - -// opencl_c_read_write_images -ifdef::backend-html5[] -:opencl_c_read_write_images: pass:q[`\__opencl_c_read_write_images`] -endif::[] -ifndef::backend-html5[] -:opencl_c_read_write_images: pass:q[`\__opencl_c_​read_​write_​images`] -endif::[] - -// opencl_c_subgroups -ifdef::backend-html5[] -:opencl_c_subgroups: pass:q[`\__opencl_c_subgroups`] -endif::[] -ifndef::backend-html5[] -:opencl_c_subgroups: pass:q[`\__opencl_c_​subgroups`] -endif::[] - -// opencl_c_work_group_collective_functions -ifdef::backend-html5[] -:opencl_c_work_group_collective_functions: pass:q[`\__opencl_c_work_group_collective_functions`] -endif::[] -ifndef::backend-html5[] -:opencl_c_work_group_collective_functions: pass:q[`\__opencl_c_​work_​group_​collective_​functions`] -endif::[] - -// opencl_c_integer_dot_product_input_4x8bit -ifdef::backend-html5[] -:opencl_c_integer_dot_product_input_4x8bit: pass:q[`\__opencl_c_integer_dot_product_input_4x8bit`] -endif::[] -ifndef::backend-html5[] -:opencl_c_integer_dot_product_input_4x8bit: pass:q[`\__opencl_c_​integer_​dot_​product_​input_​4x8bit`] -endif::[] - -// opencl_c_integer_dot_product_input_4x8bit_packed -ifdef::backend-html5[] -:opencl_c_integer_dot_product_input_4x8bit_packed: pass:q[`\__opencl_c_integer_dot_product_input_4x8bit_packed`] -endif::[] -ifndef::backend-html5[] -:opencl_c_integer_dot_product_input_4x8bit_packed: pass:q[`\__opencl_c_​integer_​dot_​product_​input_​4x8bit_​packed`] -endif::[] - -// opencl_c_kernel_clock_scope_device -ifdef::backend-html5[] -:opencl_c_kernel_clock_scope_device: pass:q[`\__opencl_c_kernel_clock_scope_device`] -endif::[] -ifndef::backend-html5[] -:opencl_c_kernel_clock_scope_device: pass:q[`\__opencl_c_​kernel_​clock_​scope_​device`] -endif::[] - -// opencl_c_kernel_clock_scope_work_group -ifdef::backend-html5[] -:opencl_c_kernel_clock_scope_work_group: pass:q[`\__opencl_c_kernel_clock_scope_work_group`] -endif::[] -ifndef::backend-html5[] -:opencl_c_kernel_clock_scope_work_group: pass:q[`\__opencl_c_​kernel_​clock_​scope_​work_​group`] -endif::[] - -// opencl_c_kernel_clock_scope_sub_group -ifdef::backend-html5[] -:opencl_c_kernel_clock_scope_sub_group: pass:q[`\__opencl_c_kernel_clock_scope_sub_group`] -endif::[] -ifndef::backend-html5[] -:opencl_c_kernel_clock_scope_sub_group: pass:q[`\__opencl_c_​kernel_​clock_​scope_​sub_​group`] -endif::[] +include::{generated}/meta/c-feature-dictionary.asciidoc[] diff --git a/c/features.txt b/c/features.txt new file mode 100644 index 000000000..b7e636dda --- /dev/null +++ b/c/features.txt @@ -0,0 +1,20 @@ +__opencl_c_3d_image_writes +__opencl_c_atomic_order_acq_rel +__opencl_c_atomic_order_seq_cst +__opencl_c_atomic_scope_device +__opencl_c_atomic_scope_all_devices +__opencl_c_device_enqueue +__opencl_c_generic_address_space +__opencl_c_fp64 +__opencl_c_images +__opencl_c_int64 +__opencl_c_pipes +__opencl_c_program_scope_global_variables +__opencl_c_read_write_images +__opencl_c_subgroups +__opencl_c_work_group_collective_functions +__opencl_c_integer_dot_product_input_4x8bit +__opencl_c_integer_dot_product_input_4x8bit_packed +__opencl_c_kernel_clock_scope_device +__opencl_c_kernel_clock_scope_work_group +__opencl_c_kernel_clock_scope_sub_group diff --git a/env/extensions.asciidoc b/env/extensions.asciidoc index f0ffc5d6b..aa963e514 100644 --- a/env/extensions.asciidoc +++ b/env/extensions.asciidoc @@ -39,7 +39,7 @@ in a SPIR-V module using *OpExtension*. If the OpenCL environment supports the extension {cl_khr_3d_image_writes_EXT}, then the environment must accept _Image_ operands to *OpImageWrite* that -are declared with with dimensionality _Dim_ equal to *3D*. +are declared with dimensionality _Dim_ equal to *3D*. ==== {cl_khr_depth_images_EXT} @@ -57,7 +57,7 @@ Additionally, the following Image Channel Orders may be returned by ==== {cl_khr_device_enqueue_local_arg_types_EXT} If the OpenCL environment supports the extension -{cl_khr_device_enqueue_local_arg_types_EXT}, then then environment will allow +{cl_khr_device_enqueue_local_arg_types_EXT}, then the environment will allow _Invoke_ functions to be passed to *OpEnqueueKernel* with *Workgroup* memory pointer parameters of any type. diff --git a/env/references.asciidoc b/env/references.asciidoc index f5dabc473..6de0c4e4a 100644 --- a/env/references.asciidoc +++ b/env/references.asciidoc @@ -36,7 +36,7 @@ 3.0, Unified`", https://www.khronos.org/registry/OpenCL/ . // References are to sections and tables of this specific version, although // other versions exists. - . [[spirv-spec]] "`SPIR-V Specification, Version 1.5, Unified`", + . [[spirv-spec]] "`SPIR-V Specification, Version 1.6, Unified`", https://www.khronos.org/registry/spir-v/ . . [[opencl-extended-instruction-set]] "`OpenCL Extended Instruction Set Specification`", https://www.khronos.org/registry/spir-v/ . diff --git a/ext/introduction.asciidoc b/ext/introduction.asciidoc index 4c4cf584e..3ff391ded 100644 --- a/ext/introduction.asciidoc +++ b/ext/introduction.asciidoc @@ -189,7 +189,7 @@ that extension on different devices for a platform. The behavior of calling a device extension function on a device not supporting that extension is undefined. -{clGetExtensionFunctionAddressForPlatform} may not be be used to query for core +{clGetExtensionFunctionAddressForPlatform} may not be used to query for core (non-extension) functions in OpenCL. For extension functions that may be queried using {clGetExtensionFunctionAddressForPlatform}, implementations may also choose to diff --git a/ext/quick_reference.asciidoc b/ext/quick_reference.asciidoc index 804c30fae..86f979c82 100644 --- a/ext/quick_reference.asciidoc +++ b/ext/quick_reference.asciidoc @@ -240,18 +240,6 @@ Language Specifications. | Allows Use of the SPIR-V `SPV_KHR_no_integer_wrap_decoration` Extension | Extension -| [[cl_khr_spirv_extended_debug_info]] link:{APISpecURL}#cl_khr_spirv_extended_debug_info[{cl_khr_spirv_extended_debug_info_EXT}] -| Allows Use of the SPIR-V `OpenCL.DebugInfo.100` Extended Instruction Set -| Extension - -| [[cl_khr_spirv_linkonce_odr]] link:{APISpecURL}#cl_khr_spirv_linkonce_odr[{cl_khr_spirv_linkonce_odr_EXT}] -| Allows Use of the SPIR-V `SPV_KHR_linkonce_odr` Extension -| Extension - -| [[cl_khr_spirv_no_integer_wrap_decoration]] link:{APISpecURL}#cl_khr_spirv_no_integer_wrap_decoration[{cl_khr_spirv_no_integer_wrap_decoration_EXT}] -| Allows Use of the SPIR-V `SPV_KHR_no_integer_wrap_decoration` Extension -| Extension - | [[cl_khr_srgb_image_writes]] link:{APISpecURL}#cl_khr_srgb_image_writes[{cl_khr_srgb_image_writes_EXT}] | Write to sRGB Images | Extension diff --git a/extensions/cl_ext_cxx_for_opencl.asciidoc b/extensions/cl_ext_cxx_for_opencl.asciidoc deleted file mode 100644 index 12bd4406f..000000000 --- a/extensions/cl_ext_cxx_for_opencl.asciidoc +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2018-2024 The Khronos Group. This work is licensed under a -// Creative Commons Attribution 4.0 International License; see -// http://creativecommons.org/licenses/by/4.0/ - -:data-uri: -:icons: font -include::../config/attribs.txt[] -:source-highlighter: coderay - -= cl_ext_cxx_for_opencl -:R: pass:q,r[^(R)^] -Khronos{R} OpenCL Working Group - -== Name Strings - -`cl_ext_cxx_for_opencl` - -== Contact - -Please see the *Issues* list in the Khronos *OpenCL-Docs* repository: + -https://github.com/KhronosGroup/OpenCL-Docs - -== Contributors - -Kevin Petit, Arm Ltd. + -Sven Van Haastregt, Arm Ltd. + -Anastasia Stulova, Arm Ltd. + -Marco Antognini, Arm Ltd. + -Neil Hickey, Arm Ltd. + -Alastair Murray, Codeplay + - -== Notice - -include::../copyrights.txt[] - -== Version - -Built On: {docdate} + -Version: 1.0.0 - -== Dependencies - -This extension is written against the OpenCL Specification -Version 3.0.3. - -This extension requires OpenCL 3.0 with OpenCL C 2.0 support or OpenCL 2.x and -`cl_khr_extended_versioning`. - -== Overview - -This extension adds support for building programs written using the C++ for -OpenCL kernel language documented in the *OpenCL-Docs* repository -(https://github.com/KhronosGroup/OpenCL-Docs) -with stable versions published in releases of the repository. - -This extension also enables applications to query the version of the language -supported by the device compiler. - -== New build option - -This extension adds support for a new `CLC++` value to be passed to the -`-cl-std` build option accepted by *clBuildProgram* and *clCompileProgram*. - -== New API Enums - -Accepted value for the _param_name_ parameter to *clGetDeviceInfo*: - -[source,c] ----- -CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT 0x4230 ----- - -== Preprocessor Macros - -This extension defines a new language, instead of extending an existing -language. As such, there will be no preprocessor `#define` matching the -extension name string. Instead, dedicated preprocessor macros conveying -language version information are available as described in the C++ for -OpenCL Programming Language Documentation, section 2.2.2.2 "Predefined -macros". - - -== Modifications to the OpenCL API Specification - -(Modify Section 4.2, *Querying Devices*) :: -+ --- - -(Add the following to Table 4.3, _Device Queries_) :: -+ --- - -[cols="1,1,4",options="header"] -|==== -| cl_device_info -| Return Type -| Description - -| `CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT` -| `cl_version` -| Returns the version of the C++ for OpenCL language supported by the - device compiler. - -|==== - --- --- - -(Modify Section 5.8.1, *Creating Program Objects*) :: -+ --- -Add the following text to the description for *clCreateProgramWithSource*: - -The source code specified by _strings_ may also be a C++ for OpenCL program source -or header. --- - -(Modify section to 5.8.6, *Compiler Options*) :: -+ --- - -(Add subsection, *C++ for OpenCL*) :: -+ --- -Applications may pass `-cl-std=CLC\++` to *clCompileProgram* and *clBuildProgram* -for programs created using *clCreateProgramFromSource* to request the program -be built as C++ for OpenCL. --- - --- - -== Conformance tests - -. Test that a program can successfully be compiled with `-cl-std=CLC++`. -. Test with a program compiled with `-cl-std=CLC++` that the value of the - +__OPENCL_CPP_VERSION__+ macro agrees with the version returned by - `CL_DEVICE_CXX_FOR_OPENCL_NUMERIC_VERSION_EXT`. - -== Issues - -None. - -== Version History - -[cols="5,15,15,70"] -[grid="rows"] -[options="header"] -|==== -| Version | Date | Author | Changes -| 1.0.0 | 2020-08-25 | Kevin Petit | *Initial revision* -|==== - diff --git a/extensions/cl_extension_template.asciidoc b/extensions/cl_extension_template.asciidoc index 2020299c7..6666c6a96 100644 --- a/extensions/cl_extension_template.asciidoc +++ b/extensions/cl_extension_template.asciidoc @@ -5,6 +5,7 @@ :data-uri: :icons: font include::../config/attribs.txt[] +include::{generated}/api/api-dictionary.asciidoc[] :source-highlighter: coderay = cl_khr_extension_template @@ -137,8 +138,7 @@ Write dates in https://en.wikipedia.org/wiki/ISO_8601[ISO 8601] date format. == Dependencies -This extension is written against the OpenCL Specification -Version 1.0, Revision 1. +This extension is written against the OpenCL Specification version 3.X.Y. This extension requires OpenCL 1.0. @@ -498,6 +498,7 @@ best not to renumber issues, either. | 0.6.0 | 2020-04-20 | Alastair Murray | Use naming conventions in the new type example. | 0.7.0 | 2021-10-05 | Ben Ashbaugh | Added recommendation for bits in bitfields. | 0.8.0 | 2021-12-13 | Ben Ashbaugh | Added OpenCL C feature names section +| 0.9.0 | 2024-07-01 | Kévin Petit | Update format for spec version and include generated definitions. |==== **** diff --git a/extensions/cl_img_bitwise_ops.asciidoc b/extensions/cl_img_bitwise_ops.asciidoc new file mode 100644 index 000000000..fbbd370fa --- /dev/null +++ b/extensions/cl_img_bitwise_ops.asciidoc @@ -0,0 +1,118 @@ +:data-uri: +:icons: font +include::../config/attribs.txt[] +:source-highlighter: coderay + += cl_img_bitwise_ops + +== Name Strings + +`cl_img_bitwise_ops` + +== Contact + +Imagination Technologies Developer Forum: + +https://forums.imgtec.com/ + +Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) + +== Contributors + +CY Cheng, Imagination Technologies. + +Tomasz Platek, Imagination Technologies. + +== Notice + +Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. + +== Status + +Final Draft + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +This extension is written against the OpenCL C Specification Version V3.0.16. + +== Overview + +This extension adds built-in functions that expose the bitwise operations of Imagination GPU IP that are not accessible by standard OpenCL C functions. + +== New OpenCL C Feature Names + +[source,c] +---- +__opencl_img_bit_interleave +---- + +== New OpenCL C Functions + +Performs the bit interleave operation: + +[source,c] +---- +gentype img_bit_interleave(gentype a, gentype b); +---- + +== Modifications to the OpenCL C Specification + +(Add to Table 16 - Built-in Scalar and Vector Argument Common Functions in Section 6.15.4 - Common Functions) :: ++ +-- +[cols="1,2",options="header"] +|==== +| Function | Description +| gentype *img_bit_interleave*(gentype a, gentype b) + a| `img_bit_interleave` interleaves the first `n` bits from two sources where `n` is half of the size of gentype in bits. + +For `a` and `b`, where a0 and b0 are the least significant bits: +[source] +---- +a = a(N-1)\|a(N-2)\|a(N-3)\|...\|a3\|a2\|a1\|a0 +b = b(N-1)\|b(N-2)\|b(N-3)\|...\|b3\|b2\|b1\|b0 +---- + +the output is: +[source] +---- +res = b(N/2-1)\|a(N/2-1)\|b(N/2-2)\|a(N/2-2)\|b(N/2-3)\|a(N/2-3)\|...\|b3\|a3\|b2\|a2\|b1\|a1\|b0\|a0 +---- +so the sizes of `a`,`b`, and `res` are equal. + +Requires that the `__opencl_img_bit_interleave` feature macro is defined. +|==== +-- + +== Coding Sample + +This coding sample shows how to use the *img_bit_interleave* function: +[source] +---- +int4 a = (int4) ( 0x00000000, 0x00000000, 0x0000FFFF, 0xFFFFFFFF); +int4 b = (int4) ( 0xFFFFFFFF, 0x0000FFFF, 0x00000000, 0x00000000); + +int4 res = img_bit_interleave(a,b); + +printf("res = [ 0x%x 0x%x 0x%x 0x%x]\n", res.s0, res.s1, res.s2, res.s3); +---- + +Executing a work-item of this kernel gives the following result: +[source] +---- +res = [ 0xaaaaaaaa 0xaaaaaaaa 0x55555555 0x55555555] +---- + +== Version History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|==== +| Version | Date | Author | Changes +| 1.0.0 | 2024-06-19 | Tomasz Platek | *Initial revision* +|==== + diff --git a/extensions/cl_img_matrix_multiply.asciidoc b/extensions/cl_img_matrix_multiply.asciidoc new file mode 100644 index 000000000..068830280 --- /dev/null +++ b/extensions/cl_img_matrix_multiply.asciidoc @@ -0,0 +1,303 @@ +:data-uri: +:icons: font +include::../config/attribs.txt[] +:source-highlighter: coderay + += cl_img_matrix_multiply + +== Name Strings + +`cl_img_matrix_multiply` + +== Contact + +Imagination Technologies Developer Forum: + +https://forums.imgtec.com/ + +Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) + +== Contributors + +CY Cheng, Imagination Technologies. + +Joe Molleson, Imagination Technologies. + +Tomasz Platek, Imagination Technologies. + +== Notice + +Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. + +== Status + +Final Draft + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +This extension is written against the OpenCL C Specification Version V3.0.16. + +This extension requires the `cl_khr_fp16` extension. + +== Overview + +This extension adds built-in functions that exercise hardware capabilities of Imagination GPU IP and allow to implement matrix multiplication in highly efficient and performant manner. + +== New OpenCL C Feature Names + +[source,c] +---- +__opencl_img_dot_interleaved +__opencl_img_matmul_2x4_4x4 +---- + +== New OpenCL C Functions + +Perform the interleaved dot product operation: + +[source,c] +---- +float2 img_dot_interleaved(float a,__local float2 * b); +float2 img_dot_interleaved(float2 a,__local float4 * b); +float2 img_dot_interleaved(float4 a,__local float8 * b); +float2 img_dot_interleaved(float8 a,__local float16 * b); +float2 img_dot_interleaved_acc(float a,__local float2 * b, float2 acc); +float2 img_dot_interleaved_acc(float2 a,__local float4 * b, float2 acc); +float2 img_dot_interleaved_acc(float4 a,__local float8 * b, float2 acc); +float2 img_dot_interleaved_acc(float8 a,__local float16 * b, float2 acc); +---- + +Perform the matrix multiplication operation: + +[source,c] +---- +float8 img_matmul_2x4_4x4f(half4 a0, half4 a1,__local half16 * b); +half8 img_matmul_2x4_4x4h(half4 a0, half4 a1,__local half16 * b); +float8 img_matmul_acc_2x4_4x4f(half4 a0, half4 a1,__local half16 * b, float4 acc0, float4 acc1); +half8 img_matmul_acc_2x4_4x4h(half4 a0, half4 a1,__local half16 * b, half4 acc0, half4 acc1); +float8 img_matmul_2x4_4x4transposedf(half4 a0, half4 a1,__local half16 * b); +half8 img_matmul_2x4_4x4transposedh(half4 a0, half4 a1,__local half16 * b); +float8 img_matmul_acc_2x4_4x4transposedf(half4 a0, half4 a1,__local half16 * b, float4 acc0, float4 acc1); +half8 img_matmul_acc_2x4_4x4transposedh(half4 a0, half4 a1,__local half16 * b, half4 acc0, half4 acc1); +---- + +== Modifications to the OpenCL C Specification + +(Add to Table 11 - Built-in Scalar and Vector Argument Math Functions in Section 6.15.2 - Math Functions) :: ++ +-- +[cols="1,2",options="header"] +|==== +| Function | Description +| float2 *img_dot_interleaved*(float _a_,pass:[__local] float2 * _b_) + + float2 *img_dot_interleaved*(float2 _a_,pass:[__local] float4 * _b_) + + float2 *img_dot_interleaved*(float4 _a_,pass:[__local] float8 * _b_) + + float2 *img_dot_interleaved*(float8 _a_,pass:[__local] float16 * _b_) + a| `img_dot_interleaved` performs the dual dot product operation. + The input vectors of the first dot product are `a` and the vector containing the even-indexed elements of `b`. The result is stored into the first element of the output vector. + The input vectors of the second dot product are `a` and the vector containing the odd-indexed elements of `b`. The result is stored into the second element of the output vector. + +For example, given: + +---- +a = [a0 a1] +b = [b0 b1 b2 b3] +---- + +the output vector is: + +---- +[res0 res1] = [a0 a1] x [b0 b1] + [b2 b3] +---- + +Requires that the `__opencl_img_dot_interleaved` feature macro is defined. +| float2 *img_dot_interleaved_acc*(float _a_,pass:[__local] float2 * _b_, float2 _acc_) + + float2 *img_dot_interleaved_acc*(float2 _a_,pass:[__local] float4 * _b_, float2 _acc_) + + float2 *img_dot_interleaved_acc*(float4 _a_,pass:[__local] float8 * _b_, float2 _acc_) + + float2 *img_dot_interleaved_acc*(float8 _a_,pass:[__local] float16 * _b_, float2 _acc_) + a| `img_dot_interleaved_acc` performs the dual dot product operation with the accumulator `acc`. + The input vectors of the first dot product are `a` and the vector containing the even-indexed elements of `b`. The result is stored into the first element of the output vector. + The input vectors of the second dot product are `a` and the vector containing the odd-indexed elements of `b`. The result is stored into the second element of the output vector. + +For example, given: + +---- +a = [a0 a1] +b = [b0 b1 b2 b3] +acc = [acc0 acc1] +---- + +the output vector is: + +---- +[res0 res1] = [a0 a1] x [b0 b1] + [acc0 acc1] + [b2 b3] +---- + +Requires that the `__opencl_img_dot_interleaved` feature macro is defined. +| float8 *img_matmul_2x4_4x4f*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) + + half8 *img_matmul_2x4_4x4h*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) + a| `img_matmul_2x4_4x4f` and `img_matmul_2x4_4x4h` perform the matrix multiplication operation of matrices A and B of dimensions 2x4 and 4x4, where `a0` is the first row and `a1` is the second row of the matrix A. + The first row of the matrix B is represented by the elements 0-3 of `b`, the second row by the elements 4-7, the third row by the elements 8-11, and the fourth row by the elements 12-15. + +For example, given: + +---- +A = [a00 a01 a02 a03] + [a10 a11 a12 a13] +B = [b0 b1 b2 b3] + [b4 b5 b6 b7] + [b8 b9 b10 b11] + [b12 b13 b14 b15] +---- + +the output vector is: + +---- +[res0 res1 res2 res3] = A x B +[res4 res5 res6 res7] +---- + +Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. +| float8 *img_matmul_acc_2x4_4x4f*(half4 _a0_, half4 _a1_,pass:[__local] half16 _b_, float4 _acc0_, float4 _acc1_) + + half8 *img_matmul_acc_2x4_4x4h*(half4 _a0_, half4 _a1_,pass:[__local] half16 _b_, half4 _acc0_, half4 _acc1_) + a| `img_matmul_acc_2x4_4x4f` and `img_matmul_acc_2x4_4x4h` perform the matrix multiplication operation with the accumulator of matrices A and B of dimensions 2x4 and 4x4, where `a0` is the first row and `a1` is the second row of the matrix A, and where `acc0` is the first row and `acc1` is the second row of the accumulator. + The first row of the matrix B is represented by the elements 0-3 of `b`, the second row by the elements 4-7, the third row by the elements 8-11, and the fourth row by the elements 12-15. + +For example, given: + +---- +A = [a00 a01 a02 a03] + [a10 a11 a12 a13] +B = [b0 b1 b2 b3] + [b4 b5 b6 b7] + [b8 b9 b10 b11] + [b12 b13 b14 b15] +C = [acc00 acc01 acc02 acc03] + [acc10 acc11 acc12 acc13] +---- + +the output vector is: + +---- +[res0 res1 res2 res3] = A x B + C +[res4 res5 res6 res7] +---- + +Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. + +| float8 *img_matmul_2x4_4x4transposedf*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) + + half8 *img_matmul_2x4_4x4transposedh*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_) + a| `img_matmul_2x4_4x4transposedf` and `img_matmul_2x4_4x4transposedh` perform the matrix multiplication operation of matrix A and transposed matrix B of dimensions 2x4 and 4x4, where `a0` is the first row and `a1` is the second row of the matrix A. + The first row of the matrix B is represented by the elements 0-3 of `b`, the second row by the elements 4-7, the third row by the elements 8-11, and the fourth row by the elements 12-15. + +For example, given: + +---- +A = [a00 a01 a02 a03] + [a10 a11 a12 a13] +BT = [b0 b4 b8 b12] + [b1 b5 b9 b13] + [b2 b6 b10 b14] + [b3 b7 b11 b15] +---- + +the output vector is: + +---- +[res0 res1 res2 res3] = A x BT +[res4 res5 res6 res7] +---- + +Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. +| float8 *img_matmul_acc_2x4_4x4transposedf*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_, float4 _acc0_, float4 _acc1_) + + half8 *img_matmul_acc_2x4_4x4transposedh*(half4 _a0_, half4 _a1_,pass:[__local] half16 * _b_, half4 _acc0_, half4 _acc1_) + a| `img_matmul_acc_2x4_4x4transposedf` and `img_matmul_acc_2x4_4x4transposedh` perform the matrix multiplication operation with the accumulator of matrix A and transposed matrix B of dimensions 2x4 and 4x4, where `a0` is the first row and `a1` is the second row of the matrix A, and where `acc0` is the first row and `acc1` is the second row of the accumulator. + The first row of the matrix B is represented by the elements 0-3 of `b`, the second row by the elements 4-7, the third row by the elements 8-11, and the fourth row by the elements 12-15. + +For example, given: + +---- +A = [a00 a01 a02 a03] + [a10 a11 a12 a13] +BT = [b0 b4 b8 b12] + [b1 b5 b9 b13] + [b2 b6 b10 b14] + [b3 b7 b11 b15] +C = [acc00 acc01 acc02 acc03] + [acc10 acc11 acc12 acc13] +---- + +the output vector is: + +---- +[res0 res1 res2 res3] = A x BT + C +[res4 res5 res6 res7] +---- + +Requires that the `__opencl_img_matmul_2x4_4x4` feature macro is defined. +|==== +-- + +== Coding Sample + +This coding sample shows how to initialize the input vectors, use the *img_dot_interleaved_acc* function, and access the output vector: +[source] +---- +float4 a = (float4) (1.0f, 1.0f, 1.0f, 1.0f); +__local float8 b; +b = (float8) (0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f); + +float2 acc = (float2) (1.0f, 1.0f); +float2 res = img_dot_interleaved_acc(a, &b, acc); + +printf("res = [ %f %f ]\n", res.s0, res.s1); +---- + +Executing a work-item containing this code gives the following result: +[source] +---- +res = [ 1.000000 5.000000 ] +---- + +This coding sample shows how to initialize the input vectors, use the *img_matmul_acc_2x4_4x4f* function, and access the output vector: +[source] +---- +half4 a0 = (half4) (1.0h, 0.0h, 0.0h, 0.0h); +half4 a1 = (half4) (0.0h, 1.0h, 0.0h, 0.0h); + +local half16 b; +b = (half16) (0.0h, 1.0h, 2.0h, 3.0h, + 4.0h, 5.0h, 6.0h, 7.0h, + 8.0h, 9.0h, 10.0h, 11.0h, + 12.0h, 13.0h, 14.0h, 15.0h); + +float4 acc0 = (float4) (1.0f, 1.0f, 1.0f, 1.0f); +float4 acc1 = (float4) (1.0f, 1.0f, 1.0f, 1.0f); + +float8 res = img_matmul_acc_2x4_4x4f(a0, a1, &b, acc0, acc1); + +printf("res = [ %f %f %f %f ]\n", res.s0, res.s1, res.s2, res.s3); +printf(" [ %f %f %f %f ]\n", res.s4, res.s5, res.s6, res.s7); +---- + +Executing a work-item containing this code gives the following result: +[source] +---- +res = [ 1.000000 2.000000 3.000000 4.000000 ] + [ 5.000000 6.000000 7.000000 8.000000 ] +---- + +== Version History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|==== +| Version | Date | Author | Changes +| 1.0.0 | 2024-06-07 | Tomasz Platek | *Initial revision* +|==== + diff --git a/extensions/cl_img_memory_management.asciidoc b/extensions/cl_img_memory_management.asciidoc new file mode 100644 index 000000000..f9aa61e83 --- /dev/null +++ b/extensions/cl_img_memory_management.asciidoc @@ -0,0 +1,247 @@ +:data-uri: +:icons: font +include::../config/attribs.txt[] +:source-highlighter: coderay + += cl_img_memory_management + +== Name Strings + +`cl_img_memory_management` + +== Contact + +Imagination Technologies Developer Forum: + +https://forums.imgtec.com/ + +Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) + +== Contributors + +CY Cheng, Imagination Technologies. + +Tomasz Platek, Imagination Technologies. + +== Notice + +Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. + +== Status + +Final Draft + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +This extension is written against the OpenCL C Specification Version V3.0.16. + +== Overview + +This extension adds built-in functions that expose the low-level memory and cache control instructions of Imagination GPU IP that are not accessible by standard OpenCL C functions. + +== New OpenCL C Feature Names + +[source,c] +---- +__opencl_img_fence +__opencl_img_cache +__opencl_img_load_store +---- + +== New OpenCL C Functions + +Issues a data fence: + +[source,c] +---- +void img_fence(cache_target_img target); +---- + +Perform the cache flush/invalidate operation: + +[source,c] +---- +void img_cache_flush(cache_target_img target); +void img_cache_invalidate(cache_target_img target); +void img_cache_flush_invalidate(cache_target_img target); +---- + +Load to/store from memory: + +[source,c] +---- +gentype img_load(gentype *p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile); +gentype img_load(const gentype *p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile); +void img_store(gentype *p, gentype value, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile); +---- + +== Modifications to the OpenCL C Specification + +(Add to Table 4 - Other Built-in Data Types in Section 6.3.3. Other Built-in Data Types) :: ++ +[cols=",",options="header",] +|==== +| Type | Description +| `cache_target_img` + | Target of the cache control functions. Refer to the Low-level Memory and Cache Control Functions section for a detailed description + of the built-in functions that use this enum. + +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. +| `cache_coherence_img` + | Level of cache coherence. Refer to the Low-level Memory and Cache Control Functions section for a detailed description + of the built-in functions that use this enum. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `L2_cache_policy_img` + | Cache policy for the L2 cache. Refer to the Low-level Memory and Cache Control Functions section for a detailed description + of the built-in functions that use this enum. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_img` + | Level of cache persistence. Refer to the Low-level Memory and Cache Control Functions section for a detailed description + of the built-in functions that use this enum. + +Requires that the `__opencl_img_load_store` feature macro is defined. +|==== + +(Add a new Section 6.15.22, *Low-level Memory and Cache Control Functions*) :: ++ +-- +The OpenCL C programming language implements the following built-in functions +to perform low-level memory and cache control operations: + +[cols="1,2",options="header"] +|==== +| Function | Description +| void *img_fence*(cache_target_img target) + a| `img_fence` issues a data fence as far as the specified `target`. For example, using `cache_target_L2_img` issues a data fence for the L1 and L2 caches. + +Requires that the `__opencl_img_fence` feature macro is defined. +| void *img_cache_flush*(cache_target_img target) + a| `img_cache_flush` flushes cache, `target` determines how far through the memory hierarchy caches are flushed. For example, using `cache_target_L2_img` flushes the L1 and L2 caches. + +Requires that the `__opencl_img_cache` feature macro is defined. +| void *img_cache_invalidate*(cache_target_img target) + a| `img_cache_invalidate` invalidates cache, `target` determines how far through the memory hierarchy caches are invalidated. For example, using `cache_target_L2_img` invalidates the L1 and L2 caches. + +Requires that the `__opencl_img_cache` feature macro is defined. +| void *img_cache_flush_invalidate*(cache_target_img target) + a| `img_cache_flush_invalidate` flushes and invalidates cache, `target` determines how far through the memory hierarchy caches are flushed and invalidated. For example, using `cache_target_L2_img` flushes and invalidates the L1 and L2 caches. + +Requires that the `__opencl_img_cache` feature macro is defined. +| gentype *img_load*(gentype pass:[*]p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile) + + gentype *img_load*(const gentype pass:[*]p, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile) + a| `img_load` returns sizeof(gentype) bytes of data from `p`, where `coherence` specifies the level of cache coherence, `policy` specifies the cache policy for the L2 cache, `persistence` specifies the level of cache persistence, and `volatile` specifies volatility. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| void *img_store*(gentype pass:[*]p, gentype value, cache_coherence_img coherence, L2_cache_policy_img policy, cache_persistence_level_img persistence, bool volatile) + a| `img_store` writes 'value' to `p`, where `coherence` specifies the level of cache coherence, `policy` specifies the cache policy for the L2 cache, `persistence` specifies the level of cache persistence, and `volatile` specifies volatility. + +Requires that the `__opencl_img_load_store` feature macro is defined. +|==== +-- + +=== Cache Target + +The enumerated type `cache_target_img` specifies the target of the cache control functions. +The following table lists the enumeration constants: + +[cols=",",options="header",] +|==== +| Cache Target | Additional Notes +| `cache_target_L1_img` + | Performs the operation on the L1 cache. + +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. +| `cache_target_L2_img` + | Performs the operation on the L1 and L2 caches. + +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. +| `cache_target_external_img` + | Performs the operation on the L1, L2, and external caches. + +Requires that the `__opencl_img_fence` or the `__opencl_img_cache` feature macro is defined. +|==== + +=== Cache Persistence +Cache persistence modifies the priority of the request in the cache where low level means that requests are evicted quickly and high level means that requests remain in cache for a long time. +The enumerated type `cache_persistence_level_img` specifies the level of cache persistence. +The following table lists the enumeration constants: + +[cols=",",options="header",] +|==== +| Cache Persistence | Additional Notes +| `cache_persistence_level_default_img` + | `cache_persistence_level_min_img` is the default persistence level. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_min_img` + | Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_low_img` + | Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_high_img` + | Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_persistence_level_max_img` + | Requires that the `__opencl_img_load_store` feature macro is defined. +|==== + +[[cache-coherence]] +==== Cache Coherence +The enumerated type `cache_coherence_img` specifies the level of cache coherence. +The following table lists the enumeration constants: + +[cols=",",options="header",] +|==== +| Cache Coherence | Additional Notes +| `cache_coherence_L1_img` + | Cache coherence is guaranteed at the L1 level. Explicit flush or invalidate may be needed to ensure data coherency at higher levels. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `cache_coherence_L2_img` + | Cache coherence is guaranteed at the L2 level. Explicit flush or invalidate may be needed to ensure data coherency at higher levels. + +Requires that the `__opencl_img_load_store` feature macro is defined. +|==== + +=== L2 Cache Policy +The enumerated type `L2_cache_policy_img` specifies the cache policy for the L2 cache. +The following table lists the enumeration constants: + +[cols=",",options="header",] +|==== +| L2 Cache Policy | Additional Notes +| `L2_cache_policy_new_alloc_img` + | Allocates a new cache line on a cache miss. + +Requires that the `__opencl_img_load_store` feature macro is defined. +| `L2_cache_policy_bypass_img` + | Permits to bypass the cache and access memory directly. + +Requires that the `__opencl_img_load_store` feature macro is defined. +|==== + +== Coding Sample + +This coding sample shows how to use the *img_load* and *img_store* functions: +[source] +---- +__kernel void test(__global int *in, __global int *out) { + int a = img_load(in, cache_coherence_L1_img, L2_cache_policy_new_alloc_img, cache_persistence_level_min_img, true); + a += 1; + img_store(out, a, cache_coherence_L1_img, L2_cache_policy_new_alloc_img, cache_persistence_level_min_img, true); +} +---- + +== Version History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|==== +| Version | Date | Author | Changes +| 1.0.0 | 2024-06-19 | Tomasz Platek | *Initial revision* +|==== + diff --git a/extensions/cl_img_swap_ops.asciidoc b/extensions/cl_img_swap_ops.asciidoc new file mode 100644 index 000000000..ea9578022 --- /dev/null +++ b/extensions/cl_img_swap_ops.asciidoc @@ -0,0 +1,134 @@ +:data-uri: +:icons: font +include::../config/attribs.txt[] +:source-highlighter: coderay + += cl_img_swap_ops + +== Name Strings + +`cl_img_swap_ops` + +== Contact + +Imagination Technologies Developer Forum: + +https://forums.imgtec.com/ + +Tomasz Platek, Imagination Technologies (Tomasz.Platek 'at' imgtec.com) + +== Contributors + +CY Cheng, Imagination Technologies. + +Tomasz Platek, Imagination Technologies. + +== Notice + +Copyright (c) 2024 Imagination Technologies Ltd. All Rights Reserved. + +== Status + +Final Draft + +== Version + +Built On: {docdate} + +Version: 1.0.0 + +== Dependencies + +This extension is written against the OpenCL C Specification Version V3.0.16. + +== Overview + +This extension adds built-in functions that exercise hardware capabilities of Imagination GPU IP and expose cross work-items swap functions. + +== New OpenCL C Feature Names + +[source,c] +---- +__opencl_img_swap +---- + +== New OpenCL C Functions + +Perform the swap operation: + +[source,c] +---- +gentype img_swap_x(gentype value); +gentype img_swap_y(gentype value); +---- + +== Modifications to the OpenCL C Specification + +(Add to Table 16 - Built-in Scalar and Vector Argument Common Functions in Section 6.15.4 - Common Functions) :: ++ +-- +[cols="1,2",options="header"] +|==== +| Function | Description +| gentype *img_swap_x*(gentype value) + a| `img_swap_x` swaps `values` between work-items in the following way: all work-items are divided into blocks of four consecutive elements. For each block: + +* In the first work-item, `img_swap_x` returns `value` passed as an argument in the second work-item. +* In the second work-item, `img_swap_x` returns `value` passed as an argument in the first work-item. +* In the third work-item, `img_swap_x` returns `value` passed as an argument in the fourth work-item. +* In the fourth work-item, `img_swap_x` returns `value` passed as an argument in the third work-item. + +The work-items are assigned into blocks based on their 1-dimensional local ID (see `get_local_linear_id`). + +The number of work-items that make up a work-group must be evenly divisible by four; otherwise, the behaviour is undefined. + +The function must be called in all four work-items of the block; otherwise, the behaviour is undefined. + +Requires that the `__opencl_img_swap` feature macro is defined. +| gentype *img_swap_y*(gentype value) + a| `img_swap_y` swaps `values` between work-items in the following way: all work-items are divided into blocks of four consecutive elements. For each block: + +* In the first work-item, `img_swap_y` returns `value` passed as an argument in the third work-item. +* In the third work-item, `img_swap_y` returns `value` passed as an argument in the first work-item. +* In the second work-item, `img_swap_y` returns `value` passed as an argument in the fourth work-item. +* In the fourth work-item, `img_swap_y` returns `value` passed as an argument in the second work-item. + +The work-items are assigned into blocks based on their 1-dimensional local ID (see `get_local_linear_id`). + +The number of work-items that make up a work-group must be evenly divisible by four; otherwise, the behaviour is undefined. + +The function must be called in all four work-items of the block; otherwise, the behaviour is undefined. + +Requires that the `__opencl_img_swap` feature macro is defined. +|==== +-- + +== Coding Sample + +This coding sample shows how to use the *img_swap_x* function: +[source] +---- +__kernel void swap() { + int i = get_global_id(0); + int res = img_swap_x(i); + + printf("id: %d, res = [ %d ]\n", i, res); +} +---- + +Executing four work-items of this kernel in one work-group gives the following result: +[source] +---- +id: 0, res = [ 1 ] +id: 1, res = [ 0 ] +id: 2, res = [ 3 ] +id: 3, res = [ 2 ] +---- + +== Version History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|==== +| Version | Date | Author | Changes +| 1.0.0 | 2024-06-19 | Tomasz Platek | *Initial revision* +|==== + diff --git a/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc b/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc new file mode 100644 index 000000000..142bef7ce --- /dev/null +++ b/extensions/cl_intel_subgroup_buffer_prefetch.asciidoc @@ -0,0 +1,241 @@ += cl_intel_subgroup_buffer_prefetch + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en + +:blank: pass:[ +] + +// Set the default source code type in this document to C, +// for syntax highlighting purposes. +:language: c + +== Name Strings + +`cl_intel_subgroup_buffer_prefetch` + +== Contact + +Grzegorz Wawiorko Intel (grzegorz 'dot' wawiorko 'at' intel 'dot' com) + +== Contributors + +// spell-checker: disable +Grzegorz Wawiorko, Intel + +Ben Ashbaugh, Intel + +Andrzej Ratajewski, Intel + +// spell-checker: enable + +== Notice + +Copyright (c) 2024 Intel Corporation. All rights reserved. + +== Status + +Complete + +== Version + +Built On: {docdate} + +Revision: 1 + +== Dependencies + +OpenCL 1.2 and support for `cl_intel_subgroups` is required. + +This extension requires OpenCL support for SPIR-V, either via OpenCL 2.1 or via the `cl_khr_il_program` extension. + +This extension is written against the OpenCL 3.0 C Language specification, V3.0.16. + +== Overview + +The extension adds the ability to prefetch data from a buffer as a sub-group operation. +The functionality added by this extension can improve the performance of some kernels by prefetching data into a cache, so future reads of the data are from a fast cache rather than slower memory. + +The new block prefetch operations are supported both in the OpenCL C kernel programming language and in the SPIR-V intermediate language. + +The prefetch functions are companions to the sub-group block reads described by the extensions `cl_intel_subgroups`, `cl_intel_subgroups_char`, `cl_intel_subgroups_short` and `cl_intel_subgroups_long`. + + +== New API Functions + +None. + +== New API Enums + +None. + +== New OpenCL C Functions + +Add `uchar` variants of the sub-group block prefetch functions: :: ++ +-- +[source] +---- +void intel_sub_group_block_prefetch_uc( const __global uchar* p ) +void intel_sub_group_block_prefetch_uc2( const __global uchar* p ) +void intel_sub_group_block_prefetch_uc4( const __global uchar* p ) +void intel_sub_group_block_prefetch_uc8( const __global uchar* p ) +void intel_sub_group_block_prefetch_uc16( const __global uchar* p ) +---- +-- + +Add `ushort` variants of the sub-group block prefetch functions: :: ++ +-- +[source] +---- +void intel_sub_group_block_prefetch_us( const __global ushort* p ) +void intel_sub_group_block_prefetch_us2( const __global ushort* p ) +void intel_sub_group_block_prefetch_us4( const __global ushort* p ) +void intel_sub_group_block_prefetch_us8( const __global ushort* p ) +void intel_sub_group_block_prefetch_us16( const __global ushort* p ) +---- +-- + +Add `uint` variants of the sub-group block prefetch functions: :: ++ +-- +[source] +---- +void intel_sub_group_block_prefetch_ui( const __global uint* p ) +void intel_sub_group_block_prefetch_ui2( const __global uint* p ) +void intel_sub_group_block_prefetch_ui4( const __global uint* p ) +void intel_sub_group_block_prefetch_ui8( const __global uint* p ) +---- +-- + +Add `ulong` variants of the sub-group block prefetch functions: :: ++ +-- +[source] +---- +void intel_sub_group_block_prefetch_ul( const __global ulong* p ) +void intel_sub_group_block_prefetch_ul2( const __global ulong* p ) +void intel_sub_group_block_prefetch_ul4( const __global ulong* p ) +void intel_sub_group_block_prefetch_ul8( const __global ulong* p ) +---- +-- + +== Modifications to the OpenCL C Specification + +=== Add a new Section 6.15.X - "Sub-group Prefetch Functions" + +-- +[cols="5a,4",options="header"] +|================================== +|*Function* +|*Description* + +|[source,c] +---- +void intel_sub_group_block_prefetch_uc( + const __global uchar* p ) +void intel_sub_group_block_prefetch_uc2( + const __global uchar* p ) +void intel_sub_group_block_prefetch_uc4( + const __global uchar* p ) +void intel_sub_group_block_prefetch_uc8( + const __global uchar* p ) +void intel_sub_group_block_prefetch_uc16( + const __global uchar* p ) +---- + +| Takes 1, 2, 4, 8 or 16 uchars of data for each work item in the sub-group from the specified pointer as a block operation and saves it in the global cache memory. + +Prefetches have no effect on the behavior of the program but can change its performance characteristics. + +|[source,c] +---- +void intel_sub_group_block_prefetch_us( + const __global ushort* p ) +void intel_sub_group_block_prefetch_us2( + const __global ushort* p ) +void intel_sub_group_block_prefetch_us4( + const __global ushort* p ) +void intel_sub_group_block_prefetch_us8( + const __global ushort* p ) +void intel_sub_group_block_prefetch_us16( + const __global ushort* p ) +---- + +| Takes 1, 2, 4, 8 or 16 ushorts of data for each work item in the sub-group from the specified pointer as a block operation and saves it in the global cache memory. + +Prefetches have no effect on the behavior of the program but can change its performance characteristics. + +|[source,c] +---- +void intel_sub_group_block_prefetch_ui( + const __global uint* p ) +void intel_sub_group_block_prefetch_ui2( + const __global uint* p ) +void intel_sub_group_block_prefetch_ui4( + const __global uint* p ) +void intel_sub_group_block_prefetch_ui8( + const __global uint* p ) +---- + +| Takes 1, 2, 4 or 8 uints of data for each work item in the sub-group from the specified pointer as a block operation and saves it in the global cache memory. + +Prefetches have no effect on the behavior of the program but can change its performance characteristics. + +|[source,c] +---- +void intel_sub_group_block_prefetch_ul( + const __global ulong* p ) +void intel_sub_group_block_prefetch_ul2( + const __global ulong* p ) +void intel_sub_group_block_prefetch_ul4( + const __global ulong* p ) +void intel_sub_group_block_prefetch_ul8( + const __global ulong* p ) +---- + +| Takes 1, 2, 4 or 8 ulongs of data for each work item in the sub-group from the specified pointer as a block operation and saves it in the global cache memory. + +Prefetches have no effect on the behavior of the program but can change its performance characteristics. + +|================================== +-- + +== Modifications to the OpenCL SPIR-V Environment Specification + +=== Add a new section 5.2.X - `cl_intel_subgroup_buffer_prefetch` + +If the OpenCL environment supports the extension `cl_intel_subgroup_buffer_prefetch`, then the environment must accept modules that declare use of the extension `SPV_INTEL_subgroup_buffer_prefetch` via *OpExtension*. + +If the OpenCL environment supports the extension `cl_intel_subgroup_buffer_prefetch` and use of the SPIR-V extension `SPV_INTEL_subgroup_buffer_prefetch` is declared in the module via *OpExtension*, then the environment must accept modules that declare the *SubgroupBufferPrefetchINTEL* capability. + +Note that the restrictions described in Section 7.1.X.3 - _Notes and Restrictions_ in the `cl_intel_spirv_subgroups` extension are unchanged and continue to apply for this extension. + +== Issues + +None. + +//. Issue? +//+ +//-- +//`STATUS`: Description. +//-- + +== Revision History + +[cols="5,15,15,70"] +[grid="rows"] +[options="header"] +|======================================== +|Rev|Date|Author|Changes +|1|2024-06-28|Grzegorz Wawiorko|*First public revision.* +|======================================== + +//************************************************************************ +//Other formatting suggestions: +// +//* Use *bold* text for host APIs, or [source] syntax highlighting. +//* Use `mono` text for device APIs, or [source] syntax highlighting. +//* Use `mono` text for extension names, types, or enum values. +//* Use _italics_ for parameters. +//************************************************************************ diff --git a/extensions/extensions.txt b/extensions/extensions.txt index 573ec1169..d28468b9c 100644 --- a/extensions/extensions.txt +++ b/extensions/extensions.txt @@ -34,8 +34,6 @@ Khronos{R} OpenCL Working Group == Multi-Vendor Extensions :leveloffset: 2 <<< -include::cl_ext_cxx_for_opencl.asciidoc[] -<<< include::cl_ext_float_atomics.asciidoc[] <<< include::cl_ext_image_from_buffer.asciidoc[] @@ -61,14 +59,22 @@ include::cl_arm_scheduling_controls.asciidoc[] == Imagination Technologies Extensions :leveloffset: 2 <<< +include::cl_img_bitwise_ops.asciidoc[] +<<< include::cl_img_cached_allocations.asciidoc[] <<< include::cl_img_cancel_command.asciidoc[] <<< include::cl_img_generate_mipmap.asciidoc[] <<< +include::cl_img_matrix_multiply.asciidoc[] +<<< +include::cl_img_memory_management.asciidoc[] +<<< include::cl_img_mem_properties.asciidoc[] <<< +include::cl_img_swap_ops.asciidoc[] +<<< include::cl_img_use_gralloc_ptr.asciidoc[] <<< include::cl_img_yuv_image.asciidoc[] diff --git a/makeSpec b/makeSpec index 4c3decf3c..ac17ffb34 100755 --- a/makeSpec +++ b/makeSpec @@ -42,7 +42,7 @@ if __name__ == '__main__': default='gen', help='Path to directory containing generated files') parser.add_argument('-spec', action='store', - choices=[ 'core', 'khr', 'all' ], + choices=[ 'core', 'khr', 'khr+ext', 'all' ], default='core', help='Type of spec to generate') parser.add_argument('-registry', action='store', @@ -79,8 +79,8 @@ if __name__ == '__main__': # extension appendices yet. if results.spec == 'all': - results.spec = 'khr' - print("WARNING: 'all' argument to -results interpreted as 'khr' at present", file=sys.stderr) + results.spec = 'khr+ext' + print("WARNING: 'all' argument to -results interpreted as 'khr+ext' at present", file=sys.stderr) if results.spec == 'core': title = '' @@ -88,6 +88,9 @@ if __name__ == '__main__': elif results.spec == 'khr': title = 'with all KHR extensions' exts = set(deps.khrExtensions()) + elif results.spec == 'khr+ext': + title = 'with all KHR and EXT extensions' + exts = set(deps.khrAndextExtensions()) elif results.spec == 'all': title = 'with all registered extensions' exts = set(deps.allExtensions()) diff --git a/scripts/apiconventions.py b/scripts/apiconventions.py index 4d27d04f9..f16dcd479 100644 --- a/scripts/apiconventions.py +++ b/scripts/apiconventions.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2021-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/cgenerator.py b/scripts/cgenerator.py index f86658ee0..713113c02 100644 --- a/scripts/cgenerator.py +++ b/scripts/cgenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/checklinks.py b/scripts/checklinks.py index 816f601a7..5d79a676c 100755 --- a/scripts/checklinks.py +++ b/scripts/checklinks.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2013-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/clconventions.py b/scripts/clconventions.py index c561eb73d..734041ae1 100644 --- a/scripts/clconventions.py +++ b/scripts/clconventions.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/docgenerator.py b/scripts/docgenerator.py index b714ef7c4..b8c2afd0a 100644 --- a/scripts/docgenerator.py +++ b/scripts/docgenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/extdependency.py b/scripts/extdependency.py index 69dbec3cc..59bfc8381 100755 --- a/scripts/extdependency.py +++ b/scripts/extdependency.py @@ -106,6 +106,7 @@ def __init__(self, self.allExts = set() self.khrExts = set() + self.extExts = set() self.ratifiedExts = set() self.graph = DiGraph() self.extensions = {} @@ -133,6 +134,9 @@ def __init__(self, if conventions.KHR_prefix in name: self.khrExts.add(name) + if conventions.EXT_prefix in name: + self.extExts.add(name) + if api_name in ratified.split(','): self.ratifiedExts.add(name) @@ -159,6 +163,10 @@ def khrExtensions(self): """Returns a set of all KHR extensions in the graph""" return self.khrExts + def khrAndextExtensions(self): + """Returns a set of all KHR and EXT extensions in the graph""" + return self.khrExts | self.extExts + def ratifiedExtensions(self): """Returns a set of all ratified extensions in the graph""" return self.ratifiedExts diff --git a/scripts/extensionmetadocgenerator.py b/scripts/extensionmetadocgenerator.py index a200bab95..9f93a29e8 100644 --- a/scripts/extensionmetadocgenerator.py +++ b/scripts/extensionmetadocgenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/genRef.py b/scripts/genRef.py index 9b78fd0dc..2eabd231e 100755 --- a/scripts/genRef.py +++ b/scripts/genRef.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2016-2024 The Khronos Group Inc. # diff --git a/scripts/gen_c_feature_dictionary.py b/scripts/gen_c_feature_dictionary.py new file mode 100644 index 000000000..f9b071735 --- /dev/null +++ b/scripts/gen_c_feature_dictionary.py @@ -0,0 +1,87 @@ +#!/usr/bin/python3 + +# Copyright 2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +from collections import OrderedDict + +import argparse +import sys + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-features', action='store', + default='', + help='File with OpenCL C features to generate, one per line') + parser.add_argument('-o', action='store', default='', + help='Output file in which to store the feature dictionary. stdout is used if no file is provided.') + + args = parser.parse_args() + + features = [] + if len(args.features) > 0: + print('Generating feature dictionaries from: ' + args.features) + with open(args.features) as f: + features = f.readlines() + else: + print('Reading feature dictionaries from stdin...') + for line in sys.stdin: + features.append(line) + print('Generating...\n') + + numberOfFeatures = 0 + + if args.o: + outfile = open(args.o, 'w') + else: + outfile = sys.stdout + + for name in features: + name = name.strip() + if len(name) == 0: + continue + + # OpenCL C features start with __opencl_c + if name.startswith('__opencl_c'): + #print('found enum: ' + name) + + # Create a variant of the name that precedes underscores with + # "zero width" spaces. This causes some long names to be + # broken at more intuitive places. + htmlName = name[:10] + name[10:].replace("_", "_") + otherName = name[:10] + name[10:].replace("_", "_​") + + # Remove the leading underscores. + name = name[2:] + + # Example: + # + # // opencl_c_images + # ifdef::backend-html5[] + # :opencl_c_images: pass:q[`\__opencl_c_images`] + # endif::[] + # ifndef::backend-html5[] + # :opencl_c_images: pass:q[`\__opencl_c_​images`] + # endif::[] + outfile.write('// ' + name + '\n') + outfile.write('ifdef::backend-html5[]\n') + outfile.write(':' + name + ': pass:q[`\\' + htmlName + '`]\n') + outfile.write('endif::[]\n') + outfile.write('ifndef::backend-html5[]\n') + outfile.write(':' + name + ': pass:q[`\\' + otherName + '`]\n') + outfile.write('endif::[]\n') + + numberOfFeatures = numberOfFeatures + 1 + + # everything else is a function + else: + print('Unexpected feature name: ' + name + ', features should start with __opencl_c!') + sys.exit(1) + + outfile.write('\n') + + if args.o: + outfile.close() + + print('Found ' + str(numberOfFeatures) + ' features.') diff --git a/scripts/gen_dictionaries.py b/scripts/gen_dictionaries.py index 4f220aa14..e513385f6 100755 --- a/scripts/gen_dictionaries.py +++ b/scripts/gen_dictionaries.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright 2019-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/gen_version_notes.py b/scripts/gen_version_notes.py index 21271643d..d5cd81ff0 100755 --- a/scripts/gen_version_notes.py +++ b/scripts/gen_version_notes.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright 2019-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/gencl.py b/scripts/gencl.py index 04f821ced..9cc8a1d2c 100755 --- a/scripts/gencl.py +++ b/scripts/gencl.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/generator.py b/scripts/generator.py index dea2ffa37..c534faf85 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/parse_dependency.py b/scripts/parse_dependency.py index 5d204959c..071d7b3c3 100755 --- a/scripts/parse_dependency.py +++ b/scripts/parse_dependency.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # Copyright 2022-2024 The Khronos Group Inc. # Copyright 2003-2019 Paul McGuire diff --git a/scripts/pygenerator.py b/scripts/pygenerator.py index 6656b4605..8656587e9 100644 --- a/scripts/pygenerator.py +++ b/scripts/pygenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/realign.py b/scripts/realign.py index 495cb74ba..71f9f85eb 100755 --- a/scripts/realign.py +++ b/scripts/realign.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2013-2024 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 diff --git a/scripts/reflib.py b/scripts/reflib.py index 41fec4928..bd873a365 100644 --- a/scripts/reflib.py +++ b/scripts/reflib.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Copyright 2016-2024 The Khronos Group Inc. # diff --git a/scripts/reg.py b/scripts/reg.py index b8f8af7ce..d5495212c 100755 --- a/scripts/reg.py +++ b/scripts/reg.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/runDocker b/scripts/runDocker new file mode 100755 index 000000000..19bf00588 --- /dev/null +++ b/scripts/runDocker @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright 2022-2024 The Khronos Group Inc. +# SPDX-License-Identifier: Apache-2.0 + +# runDocker - run the Khronos `asciidoctor-spec` Docker image with a local +# clone of the specification repository. +# The following command-line tools are required to run this script: +# awk dirname docker grep id realpath +# These are all normal Linux developer tools except for 'docker' itself. + +# Determine path to repository root directory +scriptpath=`dirname $0` +repopath=`realpath $scriptpath/..` + +# Get SHA256 of the asciidoctor-spec image build used by CI. +image=`grep -m 1 khronosgroup/docker-images@sha256: $repopath/.github/workflows/presubmit.yml | \ + awk '{print $2}'` + +uid=`id -u` +gid=`id -g` +echo "Executing Docker with spec build image and mounted spec repository root:" + +# --user causes Docker to run as the specified UID:GID instead of as root +# -it runs interactively and uses a pseudotty +# --rm removes the container on exit +# -v mounts the repository clone as /vulkan in the container +# $image is image to run +# /bin/bash drops into a shell in the container +set -x +docker run --network=host --user ${uid}:${gid} -it --rm -v ${repopath}:/opencl $image /bin/bash diff --git a/scripts/scriptgenerator.py b/scripts/scriptgenerator.py index f5ed14d00..27339b26f 100644 --- a/scripts/scriptgenerator.py +++ b/scripts/scriptgenerator.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/scripts/spec_tools/conventions.py b/scripts/spec_tools/conventions.py index 50ca75d41..190bd3b88 100644 --- a/scripts/spec_tools/conventions.py +++ b/scripts/spec_tools/conventions.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 -i +#!/usr/bin/env python3 -i # # Copyright 2013-2024 The Khronos Group Inc. # diff --git a/xml/cl.xml b/xml/cl.xml index 047f5572a..6d054f48f 100644 --- a/xml/cl.xml +++ b/xml/cl.xml @@ -249,7 +249,7 @@ server's OpenCL/api-docs repository. typedef struct _cl_mutable_command_khr* cl_mutable_command_khr; typedef cl_bitfield cl_mutable_dispatch_fields_khr; typedef cl_uint cl_mutable_command_info_khr; - typedef cl_uint cl_command_buffer_structure_type_khr; + typedef cl_uint cl_command_buffer_update_type_khr; typedef cl_bitfield cl_device_fp_atomic_capabilities_ext; typedef cl_uint cl_image_requirements_info_ext; typedef cl_bitfield cl_platform_command_buffer_capabilities_khr; @@ -370,8 +370,6 @@ server's OpenCL/api-docs repository. const void* param_value - cl_command_buffer_structure_type_khr type - const void* next cl_mutable_command_khr command cl_uint num_args cl_uint num_svm_args @@ -384,13 +382,6 @@ server's OpenCL/api-docs repository. const size_t* global_work_size const size_t* local_work_size - - - cl_command_buffer_structure_type_khr type - const void* next - cl_uint num_mutable_dispatch - const cl_mutable_dispatch_config_khr* mutable_dispatch_list - @@ -1370,10 +1361,9 @@ server's OpenCL/api-docs repository. - - - - + + + @@ -3282,9 +3272,11 @@ server's OpenCL/api-docs repository. size_t* param_value_size_ret - cl_int clUpdateMutableCommandsKHR - cl_command_buffer_khr command_buffer - const cl_mutable_base_config_khr* mutable_config + cl_int clUpdateMutableCommandsKHR + cl_command_buffer_khr command_buffer + cl_uint num_configs + const cl_command_buffer_update_type_khr* config_types + const void** configs cl_int clGetMutableCommandInfoKHR @@ -5792,7 +5784,7 @@ server's OpenCL/api-docs repository. - + @@ -5836,7 +5828,7 @@ server's OpenCL/api-docs repository. - + @@ -6568,6 +6560,11 @@ server's OpenCL/api-docs repository. + + + + + @@ -6769,7 +6766,7 @@ server's OpenCL/api-docs repository. - + @@ -7255,7 +7252,7 @@ server's OpenCL/api-docs repository. - + @@ -7293,7 +7290,7 @@ server's OpenCL/api-docs repository. - + @@ -7323,18 +7320,17 @@ server's OpenCL/api-docs repository. - + - + - @@ -7368,8 +7364,7 @@ server's OpenCL/api-docs repository. - - + @@ -7386,7 +7381,7 @@ server's OpenCL/api-docs repository. - + @@ -7426,7 +7421,7 @@ server's OpenCL/api-docs repository. - +