From 0da93bbad26343f6425aba35bc32e093eb141be2 Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 31 Mar 2024 14:13:29 -0700 Subject: [PATCH 1/3] use the feature macro dictionary attributes in more places (#1107) --- api/appendix_h.asciidoc | 36 +++++++++++++++--------------- api/opencl_architecture.asciidoc | 2 +- api/opencl_platform_layer.asciidoc | 4 ++-- c/footnotes.asciidoc | 18 +++++++-------- ext/to_core_features.asciidoc | 2 +- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/api/appendix_h.asciidoc b/api/appendix_h.asciidoc index 9e9ec6ea..1bbd6f33 100644 --- a/api/appendix_h.asciidoc +++ b/api/appendix_h.asciidoc @@ -121,10 +121,10 @@ OpenCL C compilers supporting atomics orders or scopes beyond the mandated minimum will define some or all of following feature macros as appropriate: [none] -* `+__opencl_c_atomic_order_acq_rel+` -- Indicating atomic operations support acquire-release orderings. -* `+__opencl_c_atomic_order_seq_cst+` -- Indicating atomic operations and fences support acquire sequentially consistent orderings. -* `+__opencl_c_atomic_scope_device+` -- Indicating atomic operations and fences support device-wide memory ordering constraints. -* `+__opencl_c_atomic_scope_all_devices+` -- Indicating atomic operations and fences support all-device memory ordering constraints, across any host threads and all devices that can share SVM memory with each other and the host process. +* {opencl_c_atomic_order_acq_rel} -- Indicating atomic operations support acquire-release orderings. +* {opencl_c_atomic_order_seq_cst} -- Indicating atomic operations and fences support acquire sequentially consistent orderings. +* {opencl_c_atomic_scope_device} -- Indicating atomic operations and fences support device-wide memory ordering constraints. +* {opencl_c_atomic_scope_all_devices} -- Indicating atomic operations and fences support all-device memory ordering constraints, across any host threads and all devices that can share SVM memory with each other and the host process. == Device-Side Enqueue @@ -184,9 +184,9 @@ When device-side enqueue is supported but a replaceable default on-device queue |==== -OpenCL C compilers supporting device-side enqueue and on-device queues will define the feature macro `+__opencl_c_device_enqueue+`. -OpenCL C compilers that define the feature macro `+__opencl_c_device_enqueue+` must also define the feature macro `+__opencl_c_generic_address_space+` because some OpenCL C functions for device-side enqueue accept pointers to the generic address space. -OpenCL C compilers that define the feature macro `+__opencl_c_device_enqueue+` must also define the feature macro `+__opencl_c_program_scope_global_variables+` because an implementation of blocks may interact with program scope variables in global address space as part of ABI. +OpenCL C compilers supporting device-side enqueue and on-device queues will define the feature macro {opencl_c_device_enqueue}. +OpenCL C compilers that define the feature macro {opencl_c_device_enqueue} must also define the feature macro {opencl_c_generic_address_space} because some OpenCL C functions for device-side enqueue accept pointers to the generic address space. +OpenCL C compilers that define the feature macro {opencl_c_device_enqueue} must also define the feature macro {opencl_c_program_scope_global_variables} because an implementation of blocks may interact with program scope variables in global address space as part of ABI. == Pipes @@ -216,8 +216,8 @@ When pipes are not supported: |==== -OpenCL C compilers supporting pipes will define the feature macro `+__opencl_c_pipes+`. -OpenCL C compilers that define the feature macro `+__opencl_c_pipes+` must also define the feature macro `+__opencl_c_generic_address_space+` because some OpenCL C functions for pipes accept pointers to the generic address space. +OpenCL C compilers supporting pipes will define the feature macro {opencl_c_pipes}. +OpenCL C compilers that define the feature macro {opencl_c_pipes} must also define the feature macro {opencl_c_generic_address_space} because some OpenCL C functions for pipes accept pointers to the generic address space. == Program Scope Global Variables @@ -243,7 +243,7 @@ When program scope global variables are not supported: |==== -OpenCL C compilers supporting program scope global variables will define the feature macro `+__opencl_c_program_scope_global_variables+`. +OpenCL C compilers supporting program scope global variables will define the feature macro {opencl_c_program_scope_global_variables}. // TODO: There is no SPIR-V capability specific to program scope global variables. // May need to update the validation rules to disallow program scope global variables @@ -294,7 +294,7 @@ When read-write images are not supported: |==== -OpenCL C compilers supporting read-write images will define the feature macro `+__opencl_c_read_write_images+`. +OpenCL C compilers supporting read-write images will define the feature macro {opencl_c_read_write_images}. == Creating 2D Images From Buffers @@ -434,7 +434,7 @@ When sub-groups are not supported: |==== -OpenCL C compilers supporting sub-groups will define the feature macro `+__opencl_c_subgroups+`. +OpenCL C compilers supporting sub-groups will define the feature macro {opencl_c_subgroups}. == Program Initialization and Clean-Up Kernels @@ -479,7 +479,7 @@ When writing to 3D image objects is not supported: |==== -OpenCL C compilers supporting writing to 3D image objects will define the feature macro `+__opencl_c_3d_image_writes+`. +OpenCL C compilers supporting writing to 3D image objects will define the feature macro {opencl_c_3d_image_writes}. == Work-group Collective Functions @@ -497,7 +497,7 @@ When work-group collective functions are not supported: |==== -OpenCL C compilers supporting work-group collective functions will define the feature macro `+__opencl_c_work_group_collective_functions+`. +OpenCL C compilers supporting work-group collective functions will define the feature macro {opencl_c_work_group_collective_functions}. == Generic Address Space @@ -515,7 +515,7 @@ When the generic address space is not supported: |==== -OpenCL C compilers supporting the generic address space will define the feature macro `+__opencl_c_generic_address_space+`. +OpenCL C compilers supporting the generic address space will define the feature macro {opencl_c_generic_address_space}. //== Required APIs // @@ -549,6 +549,6 @@ OpenCL C compilers supporting the generic address space will define the feature Some OpenCL C language features were already optional before OpenCL 3.0, the API mechanisms for querying these have not changed. New feature macros for these optional features have been added to OpenCL C to provide a consistent mechanism for using optional features in OpenCL C 3.0. -OpenCL C compilers supporting images will define the feature macro `+__opencl_c_images+`. -OpenCL C compilers supporting the `double` type will define the feature macro `+__opencl_c_fp64+`. -OpenCL C compilers supporting the `long`, `unsigned long` and `ulong` types will define the feature macro `+__opencl_c_int64+`, note that compilers for FULL_PROFILE devices must support these types and define the macro unconditionally. +OpenCL C compilers supporting images will define the feature macro {opencl_c_images}. +OpenCL C compilers supporting the `double` type will define the feature macro {opencl_c_fp64}. +OpenCL C compilers supporting the `long`, `unsigned long` and `ulong` types will define the feature macro {opencl_c_int64}, note that compilers for FULL_PROFILE devices must support these types and define the macro unconditionally. diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index a342edbf..4ae5b44a 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -2168,7 +2168,7 @@ OpenCL 3.0 also adds a new version of the OpenCL C programming language, which m The new version of OpenCL C is backwards compatible with OpenCL C 1.2, but is not backwards compatible with OpenCL C 2.0. The new version of OpenCL C must be explicitly requested via the `-cl-std=` build option, otherwise a program will continue to be compiled using the highest OpenCL C 1.x language version supported for the device. + -Whenever an OpenCL C feature is optional in the new version of the OpenCL C programming language, it will be paired with a feature macro, such as `+__opencl_c_feature_name+`, and a corresponding API query. +Whenever an OpenCL C feature is optional in the new version of the OpenCL C programming language, it will be paired with a feature macro, such as {opencl_c_feature_name}, and a corresponding API query. If a feature macro is defined then the feature is supported by the OpenCL C compiler, otherwise the optional feature is not supported. In order to allow future versions of OpenCL to support new types of diff --git a/api/opencl_platform_layer.asciidoc b/api/opencl_platform_layer.asciidoc index c09ff430..c5f591ac 100644 --- a/api/opencl_platform_layer.asciidoc +++ b/api/opencl_platform_layer.asciidoc @@ -2068,9 +2068,9 @@ returned for {CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR}: |==== | Feature Bit | Feature Macro | {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR} - | `__opencl_c_integer_dot_product_input_4x8bit_packed` + | {opencl_c_integer_dot_product_input_4x8bit_packed} | {CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR} - | `__opencl_c_integer_dot_product_input_4x8bit` + | {opencl_c_integer_dot_product_input_4x8bit} |==== endif::cl_khr_integer_dot_product[] diff --git a/c/footnotes.asciidoc b/c/footnotes.asciidoc index 4045e8e6..6abc922a 100644 --- a/c/footnotes.asciidoc +++ b/c/footnotes.asciidoc @@ -22,12 +22,12 @@ The <> consume operation is not supported. \ :fn-atomic-double-supported: pass:n[ \ The `atomic_double` type is only supported if double precision is supported and the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions are supported and have been enabled. \ -If this is the case then an OpenCL C 3.0 compiler must also define the `+__opencl_c_fp64+` feature. \ +If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_fp64} feature. \ ] :fn-atomic-int64-supported: pass:n[ \ The atomic_long and atomic_ulong types are supported if the *cl_khr_int64_base_atomics* and *cl_khr_int64_extended_atomics* extensions are supported and have been enabled. \ -If this is the case then an OpenCL C 3.0 compiler must also define the `+__opencl_c_int64+` feature. \ +If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_int64} feature. \ ] :fn-atomic-size_t-supported: pass:n[ \ @@ -77,17 +77,17 @@ Although `CL_UNORM_INT_101010_2` was added in OpenCL 2.1, because there was no O :fn-double: pass:n[ \ The `double` scalar type is an optional type that is supported if the value of the `CL_DEVICE_DOUBLE_FP_CONFIG` device query is not zero. \ -If this is the case then an OpenCL C 3.0 compiler must also define the `+__opencl_c_fp64+` feature macro. \ +If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_fp64} feature macro. \ ] :fn-double-supported: pass:n[ \ Only if double precision is supported. \ -In OpenCL C 3.0 this will be indicated by the presence of the `+__opencl_c_fp64+` feature macro. \ +In OpenCL C 3.0 this will be indicated by the presence of the {opencl_c_fp64} feature macro. \ ] :fn-double-vec: pass:n[ \ The `double__n__` vector type is an optional type that is supported if the value of the `CL_DEVICE_DOUBLE_FP_CONFIG` device query is not zero. \ -If this is the case then an OpenCL C 3.0 compiler must also define the `+__opencl_c_fp64+` feature macro. \ +If this is the case then an OpenCL C 3.0 compiler must also define the {opencl_c_fp64} feature macro. \ ] :fn-dse-CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP: pass:n[ \ @@ -132,7 +132,7 @@ If an implementation extends this specification to support IEEE-754 flags or exc :fn-float-types-supported: pass:n[ \ The `half` scalar and vector types can only be used if the *cl_khr_fp16* extension is supported and has been enabled. \ -The `double` scalar and vector types can only be used if `double` precision is supported, e.g. for OpenCL C 3.0 the `+__opencl_c_fp64+` feature macro is present. \ +The `double` scalar and vector types can only be used if `double` precision is supported, e.g. for OpenCL C 3.0 the {opencl_c_fp64} feature macro is present. \ ] :fn-fmin-fmax-nan: pass:n[ \ @@ -155,7 +155,7 @@ Refer to the detailed description of the built-in < Date: Sun, 31 Mar 2024 14:14:49 -0700 Subject: [PATCH 2/3] add initial draft of Valid Usage and Undefined Behavior section (#1105) --- api/opencl_architecture.asciidoc | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/api/opencl_architecture.asciidoc b/api/opencl_architecture.asciidoc index 4ae5b44a..af80cd9e 100644 --- a/api/opencl_architecture.asciidoc +++ b/api/opencl_architecture.asciidoc @@ -2278,3 +2278,33 @@ include::{generated}/api/structs/cl_name_version.txt[] containing a null-terminated string, whose maximum length is therefore {CL_NAME_VERSION_MAX_NAME_SIZE} minus one. -- + +[[valid-usage]] +=== Valid Usage and Undefined Behavior + +The OpenCL specification describes valid usage and how to use the API correctly. +For some conditions where an API is used incorrectly, behavior is well-defined, +such as returning an error code. +For other conditions, behavior is undefined, and may include program +termination. +However, OpenCL implementations must always ensure that incorrect usage by an +application does not affect the integrity of the operating system, the OpenCL +implementation, or other OpenCL client applications in the system. +In particular, any guarantees made by an operating system about whether memory +from one process can be visible to another process or not must not be violated +by an OpenCL implementation for any memory allocation. +OpenCL implementations are not required to make additional security or integrity +guarantees beyond those provided by the operating system unless explicitly +directed by the application’s use of a particular feature or extension. + +[NOTE] +-- +For instance, if an operating system guarantees that data in all its memory +allocations are set to zero when newly allocated, the OpenCL implementation must +make the same guarantees for any allocations it controls. + +Similarly, if an operating system guarantees that use-after-free of host +allocations will not result in values written by another process becoming +visible, the same guarantees must be made by the OpenCL implementation for +memory accessible to an OpenCL device. +-- From 2349f64751f3c952d1d003763381d6c9450fe06b Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Sun, 31 Mar 2024 14:15:26 -0700 Subject: [PATCH 3/3] update generated version text for extension APIs and enums (#1108) --- scripts/gen_version_notes.py | 57 +++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/scripts/gen_version_notes.py b/scripts/gen_version_notes.py index b16faa42..21271643 100755 --- a/scripts/gen_version_notes.py +++ b/scripts/gen_version_notes.py @@ -33,29 +33,37 @@ def GetFooter(): return """ """ -def FullNote(name, added_in, deprecated_by): - # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in - # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. - if added_in == "1.0" and deprecated_by == None: - return "\n// Intentionally empty, %s has always been present." % name - if added_in != "1.0" and deprecated_by == None: - return "\nIMPORTANT: {%s} is {missing_before} version %s." % (name, added_in) - if added_in == "1.0" and deprecated_by != None: - return "\nIMPORTANT: {%s} is {deprecated_by} version %s." % (name, deprecated_by) - if added_in != "1.0" and deprecated_by != None: - return "\nIMPORTANT: {%s} is {missing_before} version %s and {deprecated_by} version %s." % (name, added_in, deprecated_by) - -def ShortNote(name, added_in, deprecated_by): - # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in - # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. - if added_in == "1.0" and deprecated_by == None: - return "// Intentionally empty, %s has always been present." % name - if added_in != "1.0" and deprecated_by == None: - return "{missing_before} version %s." % added_in - if added_in == "1.0" and deprecated_by != None: - return "{deprecated_by} version %s." % deprecated_by - if added_in != "1.0" and deprecated_by != None: - return "{missing_before} version %s and {deprecated_by} version %s." % (added_in, deprecated_by) +def FullNote(name, is_extension, added_in, deprecated_by): + if is_extension: + assert deprecated_by == None + return "\nIMPORTANT: {%s} is provided by the `%s` extension." % (name, added_in) + else: + # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in + # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. + if added_in == "1.0" and deprecated_by == None: + return "\n// Intentionally empty, %s has always been present." % name + if added_in != "1.0" and deprecated_by == None: + return "\nIMPORTANT: {%s} is {missing_before} version %s." % (name, added_in) + if added_in == "1.0" and deprecated_by != None: + return "\nIMPORTANT: {%s} is {deprecated_by} version %s." % (name, deprecated_by) + if added_in != "1.0" and deprecated_by != None: + return "\nIMPORTANT: {%s} is {missing_before} version %s and {deprecated_by} version %s." % (name, added_in, deprecated_by) + +def ShortNote(name, is_extension, added_in, deprecated_by): + if is_extension: + assert deprecated_by == None + return "provided by the `%s` extension." % added_in + else: + # Four patterns: (1) always present in OpenCL, (2) added after 1.0, (3) in + # 1.0 but now deprecated, and (4) added after 1.0 but now deprecated. + if added_in == "1.0" and deprecated_by == None: + return "// Intentionally empty, %s has always been present." % name + if added_in != "1.0" and deprecated_by == None: + return "{missing_before} version %s." % added_in + if added_in == "1.0" and deprecated_by != None: + return "{deprecated_by} version %s." % deprecated_by + if added_in != "1.0" and deprecated_by != None: + return "{missing_before} version %s and {deprecated_by} version %s." % (added_in, deprecated_by) # Find feature or extension groups that are parents of a or # <${entry_type}> tag, and then find all the @@ -74,6 +82,7 @@ def process_xml(spec, entry_type, note_printer): for feature in spec.findall(f'.//{feature_type}/require/{entry_type}/../..'): for entry in feature.findall(f'.//{entry_type}'): name = entry.get('name') + is_extension = feature_type != 'feature' deprecated_by = None numberOfEntries += 1 @@ -104,7 +113,7 @@ def process_xml(spec, entry_type, note_printer): versionFileName = os.path.join(args.directory, name + ".asciidoc") with open(versionFileName, 'w') as versionFile: versionFile.write(GetHeader()) - versionFile.write(note_printer(name, added_in, deprecated_by)) + versionFile.write(note_printer(name, is_extension, added_in, deprecated_by)) versionFile.write(GetFooter()) numberOfNewEntries += 0 if added_in == "1.0" else 1