From 389e2e5b4463810786bf6c0e853682e1ecd2eb32 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Wed, 20 Mar 2024 17:00:56 +0000 Subject: [PATCH 01/22] [SYCL] Add force range rounding option and introduce new compiler flag (#12715) Adds a new preference for range rounding, force, such that if the compile flag is used, only the range rounded parallel_for kernel will be generated. This can make binaries smaller as there is no duplication of SYCL range kernels across range rounded and unrounded versions. I have also added the flag: -fsycl-range-rounding, which can have values: on, force or disable. This flag aims to supercede the fsycl-disable-range-rounding flag. I have also added to existing tests to check for the functionality of the new flag and refactored the range rounding sycl-e2e test. Also added brief description of flag's behaviour in `doc` --- clang/include/clang/Basic/LangOptions.def | 4 +- clang/include/clang/Basic/LangOptions.h | 6 + clang/include/clang/Driver/Options.td | 18 +- clang/lib/Driver/Driver.cpp | 4 + clang/lib/Driver/ToolChains/Clang.cpp | 10 +- clang/lib/Frontend/InitPreprocessor.cpp | 10 +- clang/lib/Sema/SemaSYCL.cpp | 11 +- .../integration_header_ppmacros.cpp | 10 +- clang/test/Driver/sycl-offload-intelfpga.cpp | 14 +- clang/test/Driver/sycl-offload.c | 26 +- clang/test/Preprocessor/predefined-macros.c | 54 +++- sycl/doc/design/ParallelForRangeRounding.md | 7 +- sycl/include/sycl/handler.hpp | 12 +- .../Basic/parallel_for_range_roundup.cpp | 278 ++++++++++-------- 14 files changed, 304 insertions(+), 160 deletions(-) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 526498d1675e3..7fe80879f7664 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -298,7 +298,9 @@ LANGOPT( "SYCL compiler assumes value fits within MAX_INT for member function of " "get/operator[], get_id/operator[] and get_global_id/get_global_linear_id " "in SYCL class id, iterm and nd_iterm") -LANGOPT(SYCLDisableRangeRounding, 1, 0, "Disable parallel for range rounding") +ENUM_LANGOPT(SYCLRangeRounding, SYCLRangeRoundingPreference, 2, + SYCLRangeRoundingPreference::On, + "Preference for SYCL parallel_for range rounding") LANGOPT(SYCLEnableIntHeaderDiags, 1, 0, "Enable diagnostics that require the " "SYCL integration header") LANGOPT(SYCLAllowVirtualFunctions, 1, 0, diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 2c508c32674c3..c8081a77d65c9 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -151,6 +151,12 @@ class LangOptionsBase { undefined }; + enum class SYCLRangeRoundingPreference { + On, + Disable, + Force, + }; + enum HLSLLangStd { HLSL_Unset = 0, HLSL_2015 = 2015, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 0eb5701a3a455..f770a6edb66d1 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3997,6 +3997,21 @@ def fsycl_host_compiler_options_EQ : Joined<["-"], "fsycl-host-compiler-options= Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"When performing the host compilation with " "-fsycl-host-compiler specified, use the given options during that compile. " "Options are expected to be a quoted list of space separated options.">; +def fsycl_range_rounding_EQ : Joined<["-"], "fsycl-range-rounding=">, + Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>, + Values<"on,disable,force">, + NormalizedValuesScope<"LangOptions::SYCLRangeRoundingPreference">, + NormalizedValues<["On", "Disable", "Force"]>, + MarshallingInfoEnum, "On">, + HelpText<"Options for range rounding of SYCL range kernels: " + "disable (do not generate range rounded kernels) " + "force (only generate range rounded kernels) " + "on (generate range rounded kernels as well as unrounded kernels). Default is 'on'">; +def fsycl_disable_range_rounding : Flag<["-"], "fsycl-disable-range-rounding">, + Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>, + Alias, AliasArgs<["disable"]>, + HelpText<"Deprecated: please use -fsycl-range-rounding=disable instead.">, + Flags<[Deprecated]>; def fno_sycl_use_footer : Flag<["-"], "fno-sycl-use-footer">, Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"Disable usage of the integration footer during SYCL enabled " "compilations.">; @@ -8256,9 +8271,6 @@ defm sycl_allow_func_ptr: BoolFOption<"sycl-allow-func-ptr", def fenable_sycl_dae : Flag<["-"], "fenable-sycl-dae">, HelpText<"Enable Dead Argument Elimination in SPIR kernels">, MarshallingInfoFlag>; -def fsycl_disable_range_rounding : Flag<["-"], "fsycl-disable-range-rounding">, - HelpText<"Disable parallel for range rounding.">, - MarshallingInfoFlag>; def fsycl_enable_int_header_diags: Flag<["-"], "fsycl-enable-int-header-diags">, HelpText<"Enable diagnostics that require the SYCL integration header.">, MarshallingInfoFlag>; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index eb9c374741d81..13dbf17fad334 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1158,6 +1158,10 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, checkSingleArgValidity(DeviceCodeSplit, {"per_kernel", "per_source", "auto", "off"}); + Arg *RangeRoundingPreference = + C.getInputArgs().getLastArg(options::OPT_fsycl_range_rounding_EQ); + checkSingleArgValidity(RangeRoundingPreference, {"disable", "force", "on"}); + Arg *SYCLForceTarget = getArgRequiringSYCLRuntime(options::OPT_fsycl_force_target_EQ); if (SYCLForceTarget) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9b94b761a0897..ea652823950cc 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5427,6 +5427,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_sycl_esimd_force_stateless_mem, true)) CmdArgs.push_back("-fno-sycl-esimd-force-stateless-mem"); + if (Arg *A = Args.getLastArg(options::OPT_fsycl_range_rounding_EQ)) + A->render(Args, CmdArgs); + // Add the Unique ID prefix StringRef UniqueID = D.getSYCLUniqueID(Input.getBaseInput()); if (!UniqueID.empty()) @@ -5451,10 +5454,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, bool DisableRangeRounding = false; if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O0)) - DisableRangeRounding = true; + // If the user has set some range rounding preference then let that + // override not range rounding at -O0 + if (!Args.getLastArg(options::OPT_fsycl_range_rounding_EQ)) + DisableRangeRounding = true; } if (DisableRangeRounding || HasFPGA) - CmdArgs.push_back("-fsycl-disable-range-rounding"); + CmdArgs.push_back("-fsycl-range-rounding=disable"); if (HasFPGA) { // Pass -fintelfpga to both the host and device SYCL compilations if set. diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 6ea2be70b6d3d..0dde49bde21a0 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -579,8 +579,16 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, // Set __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ macro for // both host and device compilations if -fsycl-disable-range-rounding // flag is used. - if (LangOpts.SYCLDisableRangeRounding) + switch (LangOpts.getSYCLRangeRounding()) { + case LangOptions::SYCLRangeRoundingPreference::Disable: Builder.defineMacro("__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__"); + break; + case LangOptions::SYCLRangeRoundingPreference::Force: + Builder.defineMacro("__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__"); + break; + default: + break; + } } if (LangOpts.DeclareSPIRVBuiltins) { diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 89705ca50ab83..0d07d673f3880 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -5172,10 +5172,19 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "#endif //" << Macro.first << "\n\n"; } - if (S.getLangOpts().SYCLDisableRangeRounding) { + switch (S.getLangOpts().getSYCLRangeRounding()) { + case LangOptions::SYCLRangeRoundingPreference::Disable: O << "#ifndef __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ \n"; O << "#define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1\n"; O << "#endif //__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__\n\n"; + break; + case LangOptions::SYCLRangeRoundingPreference::Force: + O << "#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ \n"; + O << "#define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1\n"; + O << "#endif //__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__\n\n"; + break; + default: + break; } if (SpecConsts.size() > 0) { diff --git a/clang/test/CodeGenSYCL/integration_header_ppmacros.cpp b/clang/test/CodeGenSYCL/integration_header_ppmacros.cpp index 0cd39fd53fee2..752189ca53847 100644 --- a/clang/test/CodeGenSYCL/integration_header_ppmacros.cpp +++ b/clang/test/CodeGenSYCL/integration_header_ppmacros.cpp @@ -2,8 +2,10 @@ // RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-SYCL2020 // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -sycl-std=2017 -fsycl-int-header=%t.h %s // RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-SYCL2017 -// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-disable-range-rounding -fsycl-int-header=%t.h %s +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-range-rounding=disable -fsycl-int-header=%t.h %s // RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-RANGE +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-range-rounding=force -fsycl-int-header=%t.h %s +// RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-FORCE-RANGE // RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-int-header=%t.h %s // RUN: FileCheck -input-file=%t.h %s --check-prefix=CHECK-NO-RANGE @@ -33,4 +35,10 @@ int main() { // CHECK-RANGE: #ifndef __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ // CHECK-RANGE-NEXT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 // CHECK-RANGE-NEXT: #endif //__SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ + +// CHECK-FORCE-RANGE: #ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ +// CHECK-FORCE-RANGE-NEXT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-FORCE-RANGE-NEXT: #endif //__SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + // CHECK-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-NO-RANGE-NOT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1 diff --git a/clang/test/Driver/sycl-offload-intelfpga.cpp b/clang/test/Driver/sycl-offload-intelfpga.cpp index 92fddf11cc877..f86f2cc7e4f7e 100644 --- a/clang/test/Driver/sycl-offload-intelfpga.cpp +++ b/clang/test/Driver/sycl-offload-intelfpga.cpp @@ -26,13 +26,13 @@ // CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fsycl-is-device"{{.*}} "-fintelfpga" // CHK-HOST-DEVICE: clang{{.*}} "-cc1"{{.*}} "-fintelfpga"{{.*}} "-fsycl-is-host" -/// FPGA target implies -fsycl-disable-range-rounding +/// FPGA target implies -fsycl-range-rounding=disable // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fintelfpga %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s -// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding" -// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-disable-range-rounding"{{.*}} "-fsycl-is-host" +// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable" +// CHK-RANGE-ROUNDING: clang{{.*}} "-fsycl-range-rounding=disable"{{.*}} "-fsycl-is-host" /// FPGA target implies -emit-only-kernels-as-entry-points in sycl-post-link // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fintelfpga %s 2>&1 \ @@ -41,12 +41,12 @@ // RUN: | FileCheck -check-prefix=CHK-NON-KERNEL-ENTRY-POINTS %s // CHK-NON-KERNEL-ENTRY-POINTS: sycl-post-link{{.*}} "-emit-only-kernels-as-entry-points" -/// -fsycl-disable-range-rounding is applied to all compilations if fpga is used +/// -fsycl-range-rounding=disable is applied to all compilations if fpga is used // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64_fpga-unknown-unknown,spir64_gen-unknown-unknown %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING-MULTI %s -// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding" -// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-fsycl-disable-range-rounding"{{.*}} "-fsycl-is-host" -// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-disable-range-rounding" +// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_gen-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable" +// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-fsycl-range-rounding=disable"{{.*}} "-fsycl-is-host" +// CHK-RANGE-ROUNDING-MULTI: clang{{.*}} "-triple" "spir64_fpga-unknown-unknown"{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-range-rounding=disable" /// -fintelfpga with -reuse-exe= // RUN: touch %t.cpp diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 636d9e89b8092..f67ca70bbb717 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -508,13 +508,33 @@ // RUN: | FileCheck -check-prefix=CHK-TOOLS-OPTS2 %s // CHK-TOOLS-OPTS2: clang-offload-wrapper{{.*}} "-link-opts=-DFOO1 -DFOO2" -/// -fsycl-disable-range-rounding settings +/// -fsycl-range-rounding settings +/// +/// // Check that driver flag is passed to cc1 +// RUN: %clang -### -fsycl -fsycl-range-rounding=disable %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-DISABLE %s +// RUN: %clang -### -fsycl -fsycl-range-rounding=force %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-FORCE %s +// RUN: %clang -### -fsycl -fsycl-range-rounding=on %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-DRIVER-RANGE-ROUNDING-ON %s +// CHK-DRIVER-RANGE-ROUNDING-DISABLE: "-cc1{{.*}}-fsycl-range-rounding=disable" +// CHK-DRIVER-RANGE-ROUNDING-FORCE: "-cc1{{.*}}-fsycl-range-rounding=force" +// CHK-DRIVER-RANGE-ROUNDING-ON: "-cc1{{.*}}-fsycl-range-rounding=on" +/// +/// // RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=spir64 -O0 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s // RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 -Od %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-DISABLE-RANGE-ROUNDING %s -// CHK-DISABLE-RANGE-ROUNDING: "-fsycl-disable-range-rounding" +// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \ +// RUN: -O0 -fsycl-range-rounding=force %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-OVERRIDE-RANGE-ROUNDING %s +// RUN: %clang_cl -### -fsycl -Od %s 2>&1 -fsycl-range-rounding=force %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-OVERRIDE-RANGE-ROUNDING %s +// CHK-DISABLE-RANGE-ROUNDING: "-fsycl-range-rounding=disable" +// CHK-OVERRIDE-RANGE-ROUNDING: "-fsycl-range-rounding=force" +// CHK-OVERRIDE-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=disable" // RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl \ // RUN: -fsycl-targets=spir64 -O2 %s 2>&1 \ @@ -527,6 +547,8 @@ // RUN: %clang_cl -### -fsycl -fsycl-targets=spir64 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-RANGE-ROUNDING %s // CHK-RANGE-ROUNDING-NOT: "-fsycl-disable-range-rounding" +// CHK-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=disable" +// CHK-RANGE-ROUNDING-NOT: "-fsycl-range-rounding=force" /// ########################################################################### diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c index 119350ebf3fab..199fd03bee63b 100644 --- a/clang/test/Preprocessor/predefined-macros.c +++ b/clang/test/Preprocessor/predefined-macros.c @@ -284,32 +284,60 @@ // CHECK-RDC: #define __CLANG_RDC__ 1 // RUN: %clang_cc1 %s -E -dM -fsycl-is-device \ -// RUN: -triple spir64-unknown-unknown -fsycl-disable-range-rounding -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE +// RUN: -triple spir64-unknown-unknown -fsycl-range-rounding=disable -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE // RUN: %clang_cc1 %s -E -dM -fsycl-is-device \ // RUN: -triple spir64_fpga-unknown-unknown -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE -// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-disable-range-rounding \ +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-range-rounding=disable \ // RUN: -triple spir64_fpga-unknown-unknown -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE // RUN: %clang_cc1 %s -E -dM -fsycl-is-device -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE // RUN: %clang_cc1 %s -E -dM -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE // RUN: %clang_cc1 %s -E -dM -fsycl-is-host \ -// RUN: -triple x86_64-unknown-linux-gnu -fsycl-disable-range-rounding -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-RANGE +// RUN: -triple x86_64-unknown-linux-gnu -fsycl-range-rounding=disable -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-RANGE // RUN: %clang_cc1 %s -E -dM -fsycl-is-host -o - \ -// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-NO-RANGE +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-DISABLE-NO-RANGE -// CHECK-RANGE: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 -// CHECK-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-DISABLE-RANGE: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-DISABLE-NO-RANGE-NOT: #define __SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING__ 1 + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \ +// RUN: -triple spir64-unknown-unknown -fsycl-range-rounding=force -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device \ +// RUN: -triple spir64_fpga-unknown-unknown -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -fsycl-range-rounding=force \ +// RUN: -triple spir64_fpga-unknown-unknown -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-device -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE + +// RUN: %clang_cc1 %s -E -dM -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-host \ +// RUN: -triple x86_64-unknown-linux-gnu -fsycl-range-rounding=force -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-RANGE + +// RUN: %clang_cc1 %s -E -dM -fsycl-is-host -o - \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-FORCE-NO-RANGE + +// CHECK-FORCE-RANGE: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1 +// CHECK-FORCE-NO-RANGE-NOT: #define __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ 1 // RUN: %clang_cc1 %s -E -dM -o - -x hip -triple x86_64-unknown-linux-gnu \ // RUN: -fgpu-default-stream=per-thread \ @@ -334,4 +362,4 @@ // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device | FileCheck -match-full-lines \ // RUN: %s --check-prefix=CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG // CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG: #define __HIPSTDPAR__ 1 -// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1 \ No newline at end of file +// CHECK-HIPSTDPAR-INTERPOSE-DEV-NEG-NOT: #define __HIPSTDPAR_INTERPOSE_ALLOC__ 1 diff --git a/sycl/doc/design/ParallelForRangeRounding.md b/sycl/doc/design/ParallelForRangeRounding.md index a4199aed8e800..7f43cafe6e96e 100644 --- a/sycl/doc/design/ParallelForRangeRounding.md +++ b/sycl/doc/design/ParallelForRangeRounding.md @@ -42,5 +42,8 @@ rounding will only be used if the SYCL runtime X dimension exceeds some minimum value, which can be configured using the `SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS` environment variable. -Generation of range rounded kernels can be disabled by using the compiler flag -`-fsycl-disable-range-rounding`. +In order to reduce binary size, the user can tell the compiler only to generate +the range rounded kernel, `-fsycl-range-rounding=force`. The user can also tell +the SYCL implementation to only produce the unrounded kernel using the flag +`-fsycl-range-rounding=disable`. By default both kernels will be generated, +which is equivalent to `-fsycl-range-rounding=on`. diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 51e2f41de9d75..672f8da32c91c 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1181,7 +1181,6 @@ class __SYCL_EXPORT handler { // non-32-bit global range, we wrap the old kernel in a new kernel // that has each work item peform multiple invocations the old // kernel in a 32-bit global range. - auto Dev = detail::getSyclObjImpl(detail::getDeviceFromHandler(*this)); id MaxNWGs = [&] { auto [MaxWGs, HasMaxWGs] = getMaxWorkGroups_v2(); if (!HasMaxWGs) { @@ -1224,6 +1223,11 @@ class __SYCL_EXPORT handler { // will yield a rounded-up value for the total range. Adjust(0, ((RoundedRange[0] + GoodFactor - 1) / GoodFactor) * GoodFactor); } +#ifdef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + // If we are forcing range rounding kernels to be used, we always want the + // rounded range kernel to be generated, even if rounding isn't needed + DidAdjust = true; +#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ for (int i = 0; i < Dims; ++i) if (RoundedRange[i] > MaxRange[i]) @@ -1330,6 +1334,9 @@ class __SYCL_EXPORT handler { { (void)UserRange; (void)Props; +#ifndef __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ + // If parallel_for range rounding is forced then only range rounded + // kernel is generated kernel_parallel_for_wrapper(KernelFunc); #ifndef __SYCL_DEVICE_ONLY__ @@ -1340,6 +1347,9 @@ class __SYCL_EXPORT handler { std::move(KernelFunc)); setType(detail::CG::Kernel); #endif +#else + (void)KernelFunc; +#endif // __SYCL_FORCE_PARALLEL_FOR_RANGE_ROUNDING__ } } diff --git a/sycl/test-e2e/Basic/parallel_for_range_roundup.cpp b/sycl/test-e2e/Basic/parallel_for_range_roundup.cpp index 0a456ef02ea00..9e6d74dcb3f85 100644 --- a/sycl/test-e2e/Basic/parallel_for_range_roundup.cpp +++ b/sycl/test-e2e/Basic/parallel_for_range_roundup.cpp @@ -1,152 +1,164 @@ // REQUIRES: gpu // RUN: %{build} -o %t.out -// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %{run} %t.out | FileCheck %s - +// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %{run} %t.out | FileCheck %s --check-prefix=CHECK-DEFAULT + +// RUN: %{build} -fsycl-range-rounding=force -o %t.out +// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %{run} %t.out | FileCheck %s --check-prefix=CHECK-DEFAULT + +// These tests test 3 things: +// +// 1. The user range is the same as the in kernel range (using BufRange) as +// reported by get_range(). +// 2. That the effective range is the same as the reported range (using +// BufCounter). i.e. check that the mapping of effective range to user range +// is "onto". +// 3. That every index in a 1, 2, or 3 dimension range is active the execution +// (using BufIndexes). i.e. check that the mapping of effective range to user +// range is "one-to-one". +// #include #include + using namespace sycl; +constexpr size_t MagicY = 33, MagicZ = 64; + range<1> Range1 = {0}; range<2> Range2 = {0, 0}; range<3> Range3 = {0, 0, 0}; +template class Kernel1; +template class Kernel2; +template class Kernel3; + void check(const char *msg, size_t v, size_t ref) { std::cout << msg << v << std::endl; assert(v == ref); } -int try_item1(size_t size) { - range<1> Size{size}; - int Counter = 0; - { - buffer, 1> BufRange(&Range1, 1); - buffer BufCounter(&Counter, 1); - queue myQueue; - - myQueue.submit([&](handler &cgh) { - auto AccRange = BufRange.get_access(cgh); - auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](item<1> ITEM) { - AccCounter[0].fetch_add(1); - AccRange[0] = ITEM.get_range(0); - }); - }); - myQueue.wait(); - } - check("Size seen by user = ", Range1.get(0), size); - check("Counter = ", Counter, size); - return 0; +template void checkVec(vec a, vec b) { + static_assert(Dims == 1 || Dims == 2 || Dims == 3, + "Should only be use for 1, 2 or 3 dimensional vectors"); + assert(a[0] == b[0]); + if constexpr (Dims > 1) + assert(a[1] == b[1]); + if constexpr (Dims > 2) + assert(a[2] == b[2]); } -void try_item2(size_t size) { - range<2> Size{size, 10}; - int Counter = 0; - { - buffer, 1> BufRange(&Range2, 1); - buffer BufCounter(&Counter, 1); - queue myQueue; - - myQueue.submit([&](handler &cgh) { - auto AccRange = BufRange.get_access(cgh); - auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](item<2> ITEM) { - AccCounter[0].fetch_add(1); - AccRange[0][0] = ITEM.get_range(0); - }); - }); - myQueue.wait(); - } - check("Size seen by user = ", Range2.get(0), size); - check("Counter = ", Counter, size * 10); -} - -void try_item3(size_t size) { - range<3> Size{size, 10, 10}; - int Counter = 0; - { - buffer, 1> BufRange(&Range3, 1); - buffer BufCounter(&Counter, 1); - queue myQueue; - - myQueue.submit([&](handler &cgh) { - auto AccRange = BufRange.get_access(cgh); - auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](item<3> ITEM) { - AccCounter[0].fetch_add(1); - AccRange[0][0] = ITEM.get_range(0); - }); - }); - myQueue.wait(); - } - check("Size seen by user = ", Range3.get(0), size); - check("Counter = ", Counter, size * 10 * 10); -} - -void try_id1(size_t size) { +template void try_1d_range(size_t size) { + using IndexCheckT = int; range<1> Size{size}; int Counter = 0; + std::vector ItemIndexes(Size[0]); { buffer, 1> BufRange(&Range1, 1); buffer BufCounter(&Counter, 1); + buffer BufIndexes(ItemIndexes); queue myQueue; myQueue.submit([&](handler &cgh) { auto AccRange = BufRange.get_access(cgh); auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](id<1> ID) { + auto AccIndexes = BufIndexes.get_access(cgh); + cgh.parallel_for>(Size, [=](KernelIdT I) { AccCounter[0].fetch_add(1); - AccRange[0] = ID[0]; + if constexpr (std::is_same_v>) + AccRange[0] = sycl::range<1>(I.get_range(0)); + int Idx = I[0]; + AccIndexes[Idx] = IndexCheckT(I[0]); }); }); myQueue.wait(); } + if constexpr (std::is_same_v>) { + check("Size seen by user at Dim 0 = ", Range1.get(0), size); + } check("Counter = ", Counter, size); + for (auto i = 0; i < Size[0]; ++i) { + checkVec<1>(vec(ItemIndexes[i]), vec(i)); + } + std::cout << "Correct kernel indexes used\n"; } -void try_id2(size_t size) { - range<2> Size{size, 10}; +template void try_2d_range(size_t size) { + using IndexCheckT = int2; + range<2> Size{size, MagicY}; int Counter = 0; + std::vector ItemIndexes(Size[0] * Size[1]); { buffer, 1> BufRange(&Range2, 1); buffer BufCounter(&Counter, 1); + buffer BufIndexes(ItemIndexes); queue myQueue; myQueue.submit([&](handler &cgh) { auto AccRange = BufRange.get_access(cgh); auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](id<2> ID) { + auto AccIndexes = BufIndexes.get_access(cgh); + cgh.parallel_for>(Size, [=](KernelIdT I) { AccCounter[0].fetch_add(1); - AccRange[0][0] = ID[0]; + if constexpr (std::is_same_v>) + AccRange[0] = sycl::range<2>(I.get_range(0), I.get_range(1)); + int Idx = I[0] * Size[1] + I[1]; + AccIndexes[Idx] = IndexCheckT(I[0], I[1]); }); }); myQueue.wait(); } - check("Counter = ", Counter, size * 10); + if constexpr (std::is_same_v>) { + check("Size seen by user at Dim 0 = ", Range2.get(0), Size[0]); + check("Size seen by user at Dim 1 = ", Range2.get(1), Size[1]); + } + check("Counter = ", Counter, size * MagicY); + for (auto i = 0; i < Size[0]; ++i) + for (auto j = 0; j < Size[1]; ++j) + checkVec<2>(ItemIndexes[i * Size[1] + j], IndexCheckT(i, j)); + std::cout << "Correct kernel indexes used\n"; } -void try_id3(size_t size) { - range<3> Size{size, 10, 10}; +template void try_3d_range(size_t size) { + using IndexCheckT = int3; + range<3> Size{size, MagicY, MagicZ}; int Counter = 0; + std::vector ItemIndexes(Size[0] * Size[1] * Size[2]); { buffer, 1> BufRange(&Range3, 1); buffer BufCounter(&Counter, 1); + buffer BufIndexes(ItemIndexes); queue myQueue; myQueue.submit([&](handler &cgh) { auto AccRange = BufRange.get_access(cgh); auto AccCounter = BufCounter.get_access(cgh); - cgh.parallel_for(Size, [=](id<3> ID) { + auto AccIndexes = BufIndexes.get_access(cgh); + cgh.parallel_for>(Size, [=](KernelIdT I) { AccCounter[0].fetch_add(1); - AccRange[0][0] = ID[0]; + if constexpr (std::is_same_v>) + AccRange[0] = + sycl::range<3>(I.get_range(0), I.get_range(1), I.get_range(2)); + int Idx = I[0] * Size[1] * Size[2] + I[1] * Size[2] + I[2]; + AccIndexes[Idx] = IndexCheckT(I[0], I[1], I[2]); }); }); myQueue.wait(); } - check("Counter = ", Counter, size * 10 * 10); + if constexpr (std::is_same_v>) { + check("Size seen by user at Dim 0 = ", Range3.get(0), Size[0]); + check("Size seen by user at Dim 1 = ", Range3.get(1), Size[1]); + check("Size seen by user at Dim 2 = ", Range3.get(2), Size[2]); + } + check("Counter = ", Counter, size * MagicY * MagicZ); + for (auto i = 0; i < Size[0]; ++i) + for (auto j = 0; j < Size[1]; ++j) + for (auto k = 0; k < Size[2]; ++k) + checkVec<3>(ItemIndexes[i * Size[1] * Size[2] + j * Size[2] + k], + IndexCheckT(i, j, k)); + std::cout << "Correct kernel indexes used\n"; } void try_unnamed_lambda(size_t size) { - range<3> Size{size, 10, 10}; + range<3> Size{size, MagicY, MagicZ}; int Counter = 0; { buffer, 1> BufRange(&Range3, 1); @@ -163,57 +175,71 @@ void try_unnamed_lambda(size_t size) { }); myQueue.wait(); } - check("Counter = ", Counter, size * 10 * 10); + check("Counter = ", Counter, size * MagicY * MagicZ); } int main() { - int x; - - x = 1500; - try_item1(x); - try_item2(x); - try_item3(x); - try_id1(x); - try_id2(x); - try_id3(x); + int x = 1500; + try_1d_range>(x); + try_1d_range>(x); + try_2d_range>(x); + try_2d_range>(x); + try_3d_range>(x); + try_3d_range>(x); try_unnamed_lambda(x); x = 256; - try_item1(x); - try_item2(x); - try_item3(x); - try_id1(x); - try_id2(x); - try_id3(x); + try_1d_range>(x); + try_1d_range>(x); + try_2d_range>(x); + try_2d_range>(x); + try_3d_range>(x); + try_3d_range>(x); try_unnamed_lambda(x); - - return 0; } -// CHECK: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Size seen by user = 1500 -// CHECK-NEXT: Counter = 1500 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Size seen by user = 1500 -// CHECK-NEXT: Counter = 15000 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Size seen by user = 1500 -// CHECK-NEXT: Counter = 150000 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Counter = 1500 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Counter = 15000 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Counter = 150000 -// CHECK-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 -// CHECK-NEXT: Counter = 150000 -// CHECK-NEXT: Size seen by user = 256 -// CHECK-NEXT: Counter = 256 -// CHECK-NEXT: Size seen by user = 256 -// CHECK-NEXT: Counter = 2560 -// CHECK-NEXT: Size seen by user = 256 -// CHECK-NEXT: Counter = 25600 -// CHECK-NEXT: Counter = 256 -// CHECK-NEXT: Counter = 2560 -// CHECK-NEXT: Counter = 25600 -// CHECK-NEXT: Counter = 25600 +// CHECK-DEFAULT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 1500 +// CHECK-DEFAULT-NEXT: Counter = 1500 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Counter = 1500 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 1500 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 1 = 33 +// CHECK-DEFAULT-NEXT: Counter = 49500 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Counter = 49500 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 1500 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 1 = 33 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 2 = 64 +// CHECK-DEFAULT-NEXT: Counter = 3168000 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Counter = 3168000 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: parallel_for range adjusted at dim 0 from 1500 to 1504 +// CHECK-DEFAULT-NEXT: Counter = 3168000 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 256 +// CHECK-DEFAULT-NEXT: Counter = 256 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Counter = 256 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 256 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 1 = 33 +// CHECK-DEFAULT-NEXT: Counter = 8448 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Counter = 8448 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 0 = 256 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 1 = 33 +// CHECK-DEFAULT-NEXT: Size seen by user at Dim 2 = 64 +// CHECK-DEFAULT-NEXT: Counter = 540672 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Counter = 540672 +// CHECK-DEFAULT-NEXT: Correct kernel indexes used +// CHECK-DEFAULT-NEXT: Counter = 540672 From 36450f2d1e6b78ecba1745dda7e1a95f3f273988 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Wed, 20 Mar 2024 10:01:48 -0700 Subject: [PATCH 02/22] [SYCL] Add break statements to avoid unannotated fallthrough (#13068) --- sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp | 3 ++- sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp | 2 ++ sycl/source/detail/global_handler.cpp | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp b/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp index 6d039e4314fa5..52d419c93ee05 100644 --- a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp +++ b/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp @@ -224,8 +224,9 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, // handle to DLL module if (PrintPiTrace) std::cout << "---> DLL_PROCESS_DETACH pi_win_proxy_loader.dll\n" << std::endl; - + break; case DLL_THREAD_ATTACH: + break; case DLL_THREAD_DETACH: break; } diff --git a/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp b/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp index c473da9cd5cbb..3a2ca6185f775 100644 --- a/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp +++ b/sycl/plugins/common_win_pi_trace/common_win_pi_trace.hpp @@ -33,7 +33,9 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, // handle to DLL module if (PrintPiTrace) std::cout << "---> DLL_PROCESS_ATTACH " << __SYCL_PLUGIN_DLL_NAME << "\n" << std::endl; + break; case DLL_THREAD_ATTACH: + break; case DLL_THREAD_DETACH: break; } diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 2dbbfe6767c25..d57b6e5a50354 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -357,7 +357,9 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, case DLL_PROCESS_ATTACH: if (PrintPiTrace) std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; + break; case DLL_THREAD_ATTACH: + break; case DLL_THREAD_DETACH: break; } From 92f5b98a6fbe2dc4809bf295dd6117cdd63c2598 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 20 Mar 2024 20:06:27 +0100 Subject: [PATCH 03/22] [SYCL][E2E] Disable TaskSequence/* tests on Linux (#13080) They fail in post-commit --- sycl/test-e2e/TaskSequence/concurrent-loops.cpp | 3 +++ sycl/test-e2e/TaskSequence/in-order-async-get.cpp | 3 +++ sycl/test-e2e/TaskSequence/mult-and-add.cpp | 3 +++ .../test-e2e/TaskSequence/multi-kernel-task-function-reuse.cpp | 3 +++ sycl/test-e2e/TaskSequence/producer-consumer.cpp | 3 +++ sycl/test-e2e/TaskSequence/struct-array-args-and-return.cpp | 3 +++ 6 files changed, 18 insertions(+) diff --git a/sycl/test-e2e/TaskSequence/concurrent-loops.cpp b/sycl/test-e2e/TaskSequence/concurrent-loops.cpp index 2c4b56b7c0e67..6253655574dce 100644 --- a/sycl/test-e2e/TaskSequence/concurrent-loops.cpp +++ b/sycl/test-e2e/TaskSequence/concurrent-loops.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/in-order-async-get.cpp b/sycl/test-e2e/TaskSequence/in-order-async-get.cpp index f8c42a05ab324..826cb2766fede 100644 --- a/sycl/test-e2e/TaskSequence/in-order-async-get.cpp +++ b/sycl/test-e2e/TaskSequence/in-order-async-get.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/mult-and-add.cpp b/sycl/test-e2e/TaskSequence/mult-and-add.cpp index 3b8119a44223b..303f7a5376b15 100644 --- a/sycl/test-e2e/TaskSequence/mult-and-add.cpp +++ b/sycl/test-e2e/TaskSequence/mult-and-add.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/multi-kernel-task-function-reuse.cpp b/sycl/test-e2e/TaskSequence/multi-kernel-task-function-reuse.cpp index cb3abf85a1ead..98aebb6aab057 100644 --- a/sycl/test-e2e/TaskSequence/multi-kernel-task-function-reuse.cpp +++ b/sycl/test-e2e/TaskSequence/multi-kernel-task-function-reuse.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/producer-consumer.cpp b/sycl/test-e2e/TaskSequence/producer-consumer.cpp index 2b5e46ba0e386..dbd1c5d06878c 100644 --- a/sycl/test-e2e/TaskSequence/producer-consumer.cpp +++ b/sycl/test-e2e/TaskSequence/producer-consumer.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/TaskSequence/struct-array-args-and-return.cpp b/sycl/test-e2e/TaskSequence/struct-array-args-and-return.cpp index 94a630591b132..e1ffc6d603a3a 100644 --- a/sycl/test-e2e/TaskSequence/struct-array-args-and-return.cpp +++ b/sycl/test-e2e/TaskSequence/struct-array-args-and-return.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// FIXME: failure in post-commit, re-enable when fixed: +// UNSUPPORTED: linux + // REQUIRES: aspect-ext_intel_fpga_task_sequence // RUN: %clangxx -fsycl -fintelfpga %s -o %t.out // RUN: %{run} %t.out From 2f9c0bb80ad06c99369f73da9ec8b4040627347c Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Wed, 20 Mar 2024 20:08:06 +0100 Subject: [PATCH 04/22] [SYCL][E2E]Update Regression/multiple-targets.cpp (#13074) Sets `spir64` instead of `spirv64` as SYCL target, uses lower case `cuda` and `hip` for `REQUIRES` flags, adds `native_cpu` as a possible required backend. --- sycl/test-e2e/Regression/multiple-targets.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sycl/test-e2e/Regression/multiple-targets.cpp b/sycl/test-e2e/Regression/multiple-targets.cpp index 13f136f8aa64a..d21f7c458aec2 100644 --- a/sycl/test-e2e/Regression/multiple-targets.cpp +++ b/sycl/test-e2e/Regression/multiple-targets.cpp @@ -2,17 +2,17 @@ // It tests if the target triples can be specified with any order. // The test is repeated for per_kernel device code splitting. // -// REQUIRES: CUDA || HIP -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spirv64 -o %t.out %s +// REQUIRES: cuda || hip || native_cpu +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spir64 -o %t.out %s // RUN: %{run} %t.out // -// RUN: %clangxx -fsycl -fsycl-targets=spirv64,%{sycl_triple} -o %t.out %s +// RUN: %clangxx -fsycl -fsycl-targets=spir64,%{sycl_triple} -o %t.out %s // RUN: %{run} %t.out // -// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spirv64 -fsycl-device-code-split=per_kernel -o %t.out %s +// RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple},spir64 -fsycl-device-code-split=per_kernel -o %t.out %s // RUN: %{run} %t.out // -// RUN: %clangxx -fsycl -fsycl-targets=spirv64,%{sycl_triple} -fsycl-device-code-split=per_kernel -o %t.out %s +// RUN: %clangxx -fsycl -fsycl-targets=spir64,%{sycl_triple} -fsycl-device-code-split=per_kernel -o %t.out %s // RUN: %{run} %t.out // // XFAIL: hip_nvidia From 9876e19f4ff387b35b0c98c7d62e5f50e6de187d Mon Sep 17 00:00:00 2001 From: tovinkere Date: Wed, 20 Mar 2024 21:22:13 -0700 Subject: [PATCH 05/22] [SYCL][XPTI] 'queue_id' metadata feature refactoring (#13070) - Better requirements/test cases showed gaps in previous implementation that resulted in data inconsistencies - Metadata is associated with UID and since UIDs are the same multiple instantiations of the same object, only invariant data needs to be stored in the metadata object - Adding mutable data resulted in data inconsistencies and the feature refactoring addresses these issues --------- Signed-off-by: Vasanth Tovinkere --- sycl/source/detail/queue_impl.cpp | 4 ++ sycl/source/detail/queue_impl.hpp | 13 +++- sycl/source/detail/scheduler/commands.cpp | 66 +++++++++++++------ sycl/source/detail/xpti_registry.hpp | 47 ++++++++++++- sycl/test-e2e/XPTI/Inputs/test_collector.cpp | 7 ++ .../XPTI/basic_event_collection_linux.cpp | 22 +++++-- .../tools/sycl-trace/sycl_trace_collector.cpp | 9 +++ .../xptitest_subscriber/XPTISubscriber.cpp | 35 +++++----- xpti/include/xpti/xpti_trace_framework.h | 38 +++++++++++ xpti/include/xpti/xpti_trace_framework.hpp | 25 ++++++- xpti/src/xpti_proxy.cpp | 37 +++++++++++ xptifw/CMakeLists.txt | 2 +- xptifw/src/xpti_trace_framework.cpp | 57 +++++++++++++++- 13 files changed, 313 insertions(+), 49 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 64a1fb5e888ac..321cc48b29769 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -113,6 +113,9 @@ event queue_impl::memset(const std::shared_ptr &Self, xpti::addMetadata(TEvent, "memory_size", Count); xpti::addMetadata(TEvent, "queue_id", MQueueID); }); + // Before we notifiy the subscribers, we broadcast the 'queue_id', which was a + // metadata entry to TLS for use by callback handlers + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); // Notify XPTI about the memset submission PrepareNotify.notify(); // Emit a begin/end scope for this call @@ -159,6 +162,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, xpti::addMetadata(TEvent, "memory_size", Count); xpti::addMetadata(TEvent, "queue_id", MQueueID); }); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); // Notify XPTI about the memset submission PrepareNotify.notify(); // Emit a begin/end scope for this call diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 7109555b05ecc..890891644bbac 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -92,7 +92,7 @@ class queue_impl { /// \param PropList is a list of properties to use for queue construction. queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList) - : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {}; + : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){}; /// Constructs a SYCL queue with an async_handler and property_list provided /// form a device and a context. @@ -176,13 +176,16 @@ class queue_impl { // This section is the second part of the instrumentation that uses the // tracepoint information and notifies } + // We enable XPTI tracing events using the TLS mechanism; if the code // location data is available, then the tracing data will be rich. #if XPTI_ENABLE_INSTRUMENTATION constexpr uint16_t NotificationTraceType = static_cast(xpti::trace_point_type_t::queue_create); + // Using the instance override constructor for use with queues as queues + // maintain instance IDs in the object XPTIScope PrepareNotify((void *)this, NotificationTraceType, - SYCL_STREAM_NAME, "queue_create"); + SYCL_STREAM_NAME, MQueueID, "queue_create"); // Cache the trace event, stream id and instance IDs for the destructor if (xptiCheckTraceEnabled(PrepareNotify.streamID(), NotificationTraceType)) { @@ -207,6 +210,8 @@ class queue_impl { xpti::addMetadata(TEvent, "queue_handle", reinterpret_cast(getHandleRef())); }); + // Also publish to TLS + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); PrepareNotify.notify(); } #endif @@ -244,7 +249,7 @@ class queue_impl { constexpr uint16_t NotificationTraceType = static_cast(xpti::trace_point_type_t::queue_create); XPTIScope PrepareNotify((void *)this, NotificationTraceType, - SYCL_STREAM_NAME, "queue_create"); + SYCL_STREAM_NAME, MQueueID, "queue_create"); if (xptiCheckTraceEnabled(PrepareNotify.streamID(), NotificationTraceType)) { // Cache the trace event, stream id and instance IDs for the destructor @@ -269,6 +274,8 @@ class queue_impl { if (!MHostQueue) xpti::addMetadata(TEvent, "queue_handle", getHandleRef()); }); + // Also publish to TLS before notification + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); PrepareNotify.notify(); } #endif diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 8777b82db1f6b..efc553cdb97e2 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1005,7 +1005,10 @@ void AllocaCommandBase::emitInstrumentationData() { xpti::addMetadata(TE, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); } #endif } @@ -1124,7 +1127,8 @@ void AllocaSubBufCommand::emitInstrumentationData() { this->MRequirement.MAccessRange[0]); xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1202,8 +1206,10 @@ void ReleaseCommand::emitInstrumentationData() { getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "allocation_type", commandToName(MAllocaCmd->getType())); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1323,8 +1329,10 @@ void MapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1386,8 +1394,10 @@ void UnMapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1489,8 +1499,10 @@ void MemCpyCommand::emitInstrumentationData() { xpti::addMetadata( CmdTraceEvent, "copy_to", reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get())); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1665,8 +1677,10 @@ void MemCpyCommandHost::emitInstrumentationData() { xpti::addMetadata( CmdTraceEvent, "copy_to", reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get())); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1756,8 +1770,10 @@ void EmptyCommand::emitInstrumentationData() { getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1828,8 +1844,10 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -2063,7 +2081,9 @@ void instrumentationFillCommonData(const std::string &KernelName, xpti::addMetadata(CmdTraceEvent, "sym_column_no", static_cast(Column)); } - xpti::addMetadata(CmdTraceEvent, "queue_id", Queue->getQueueID()); + // We no longer set the 'queue_id' in the metadata structure as it is a + // mutable value and multiple threads using the same queue created at the + // same location will overwrite the metadata values creating inconsistencies } } #endif @@ -2096,6 +2116,10 @@ std::pair emitKernelInstrumentationData( FromSource, InstanceID, CmdTraceEvent); if (CmdTraceEvent) { + // Stash the queue_id mutable metadata in TLS + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + Queue->getQueueID()); + instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc, KernelBundleImplPtr, SyclKernelName, SyclKernel, Queue, CGArgs); @@ -2139,6 +2163,8 @@ void ExecCGCommand::emitInstrumentationData() { CmdTraceEvent); if (CmdTraceEvent) { + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); MTraceEvent = static_cast(CmdTraceEvent); if (MCommandGroup->getType() == detail::CG::Kernel) { auto KernelCG = @@ -3351,10 +3377,12 @@ void KernelFusionCommand::emitInstrumentationData() { deviceToString(MQueue->get_device())); xpti::addMetadata(CmdTraceEvent, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); } - if (MFirstInstance) { + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); xptiNotifySubscribers(MStreamID, NotificationTraceType, detail::GSYCLGraphEvent, static_cast(MTraceEvent), MInstanceID, diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index be546e4e27905..681e2841c027b 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -42,6 +42,9 @@ extern uint8_t GMemAllocStreamID; extern xpti::trace_event_data_t *GMemAllocEvent; extern xpti::trace_event_data_t *GSYCLGraphEvent; +// We will pick a global constant so that the pointer in TLS never goes stale +inline constexpr auto XPTI_QUEUE_INSTANCE_ID_KEY = "queue_id"; + #define STR(x) #x #define SYCL_VERSION_STR \ "sycl " STR(__LIBSYCL_MAJOR_VERSION) "." STR(__LIBSYCL_MINOR_VERSION) @@ -165,6 +168,45 @@ class XPTIRegistry { class XPTIScope { public: using TracePoint = xpti::framework::tracepoint_t; + /// @brief Scoped class for XPTI instrumentation using TLS data + /// @param CodePtr The address of the class/function to help differentiate + /// actions in case the code location information is not available + /// @param TraceType The type of trace event being created + /// @param StreamName The stream which will emit these notifications + /// @param InstanceID The instance ID associated with an object, otherwise 0 + /// will auto-generate + /// @param UserData String value that provides metadata about the + /// instrumentation + XPTIScope(void *CodePtr, uint16_t TraceType, const char *StreamName, + uint64_t InstanceID, const char *UserData) + : MUserData(UserData), MStreamID(0), MInstanceID(InstanceID), + MScopedNotify(false), MTraceType(0) { + detail::tls_code_loc_t Tls; + auto TData = Tls.query(); + // If TLS is not set, we can still genertate universal IDs with user data + // and CodePtr information + const char *FuncName = TData.functionName(); + if (!TData.functionName() && !TData.fileName()) + FuncName = UserData; + // Create a tracepoint object that has a lifetime of this class + MTP = new TracePoint(TData.fileName(), FuncName, TData.lineNumber(), + TData.columnNumber(), CodePtr); + if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create || + TraceType == (uint16_t)xpti::trace_point_type_t::node_create || + TraceType == (uint16_t)xpti::trace_point_type_t::edge_create || + TraceType == (uint16_t)xpti::trace_point_type_t::queue_create) + MTP->parent_event(GSYCLGraphEvent); + // Now if tracing is enabled, create trace events and notify + if (xptiTraceEnabled() && MTP) { + MTP->stream(StreamName).trace_type((xpti::trace_point_type_t)TraceType); + MTraceEvent = const_cast(MTP->trace_event()); + MStreamID = MTP->stream_id(); + // This constructor uses a manual override for the instance ID as some + // objects such as queues keep track of instance IDs + MTP->override_instance_id(MInstanceID); + } + } + /// @brief Scoped class for XPTI instrumentation using TLS data /// @param CodePtr The address of the class/function to help differentiate /// actions in case the code location information is not available @@ -188,7 +230,8 @@ class XPTIScope { TData.columnNumber(), CodePtr); if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create || TraceType == (uint16_t)xpti::trace_point_type_t::node_create || - TraceType == (uint16_t)xpti::trace_point_type_t::edge_create) + TraceType == (uint16_t)xpti::trace_point_type_t::edge_create || + TraceType == (uint16_t)xpti::trace_point_type_t::queue_create) MTP->parent_event(GSYCLGraphEvent); // Now if tracing is enabled, create trace events and notify if (xptiTraceEnabled() && MTP) { @@ -243,6 +286,8 @@ class XPTIScope { MTraceType == (uint16_t)xpti::trace_point_type_t::graph_create || MTraceType == (uint16_t)xpti::trace_point_type_t::node_create || MTraceType == (uint16_t)xpti::trace_point_type_t::edge_create || + MTraceType == (uint16_t)xpti::trace_point_type_t::queue_create || + MTraceType == (uint16_t)xpti::trace_point_type_t::queue_destroy || MTraceType == (uint16_t)xpti::trace_point_type_t::diagnostics) return; diff --git a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp index a7c00dffdf1cd..be75f61137ea3 100644 --- a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp +++ b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp @@ -62,6 +62,10 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType, xpti::trace_event_data_t *, xpti::trace_event_data_t *Event, uint64_t, const void *UserData) { + char *Key = 0; + uint64_t Value; + bool HaveKeyValue = + (xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS); std::lock_guard Lock{GMutex}; auto Type = static_cast(TraceType); switch (Type) { @@ -99,6 +103,9 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType, std::cout << "Unknown tracepoint\n"; } + if (HaveKeyValue) { + std::cout << " " << Key << " : " << Value << "\n"; + } xpti::metadata_t *Metadata = xptiQueryMetadata(Event); for (auto &Item : *Metadata) { std::cout << " " << xptiLookupString(Item.first) << " : " diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp index 61b53feed0622..5a895b67d0097 100644 --- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp +++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp @@ -28,6 +28,7 @@ // CHECK-NEXT: PI Call Begin : piPlatformGetInfo // CHECK-NEXT: PI Call Begin : piKernelSetExecInfo // CHECK: Node create +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -35,10 +36,14 @@ // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Node create -// CHECK-NEXT: kernel_name : virtual_node[{{.*}}] +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: kernel_name : virtual_node[{{.*}}] // CHECK-NEXT: Edge create -// CHECK-NEXT: event : {{.*}} +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: event : {{.*}} +// CHECK-DAG: kernel_name : virtual_node[{{.*}}] // CHECK-NEXT: Task begin +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -51,6 +56,7 @@ // CHECK-NEXT: PI Call Begin : piKernelRelease // CHECK-NEXT: PI Call Begin : piProgramRelease // CHECK-NEXT: Signal +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -58,6 +64,7 @@ // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Task end +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -65,27 +72,34 @@ // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Wait begin +// CHECK-DAG: queue_id : {{.*}} // CHECK-NEXT: PI Call Begin : piEventsWait // CHECK-NEXT: Wait end +// CHECK-DAG: queue_id : {{.*}} // CHECK-NEXT: Node create +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Task begin +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: PI Call Begin : piextUSMEnqueueMemcpy // CHECK-NEXT: Task end +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: PI Call Begin : piEventRelease // CHECK-NEXT: Wait begin -// CHECK: sycl_device_type : {{.*}} +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: sycl_device_type : {{.*}} // CHECK: PI Call Begin : piQueueFinish // CHECK-NEXT: Wait end -// CHECK: sycl_device_type : {{.*}} +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: sycl_device_type : {{.*}} diff --git a/sycl/tools/sycl-trace/sycl_trace_collector.cpp b/sycl/tools/sycl-trace/sycl_trace_collector.cpp index 55075c5437879..5cf5b3bc5f5b9 100644 --- a/sycl/tools/sycl-trace/sycl_trace_collector.cpp +++ b/sycl/tools/sycl-trace/sycl_trace_collector.cpp @@ -57,6 +57,11 @@ void TraceTaskExecutionSignals(xpti::trace_event_data_t * /*Parent*/, if (!Event) return; + char *Key = 0; + uint64_t Value; + bool HaveKeyValue = + (xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS); + std::cout << "[SYCL] Task " << (IsBegin ? "begin" : "end ") << " (event=" << Event << ",instanceID=" << InstanceID << ")" << std::endl; @@ -67,6 +72,10 @@ void TraceTaskExecutionSignals(xpti::trace_event_data_t * /*Parent*/, if (!IsBegin || !PrintSyclVerbose) return; + if (HaveKeyValue) { + std::cout << "\t " << Key << " : " << Value << std::endl; + } + xpti::metadata_t *Metadata = xptiQueryMetadata(Event); for (auto &Item : *Metadata) { std::cout << "\t " << xptiLookupString(Item.first) << " : " diff --git a/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp b/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp index fcbbb02126a62..2c79f76269c11 100644 --- a/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp +++ b/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp @@ -35,6 +35,13 @@ XPTI_CALLBACK_API void testCallback(uint16_t TraceType, if (GAnalyzedTraceTypes.find(TraceType) == GAnalyzedTraceTypes.end()) return; + // Since "queue_id" is no longer a metadata item, we have to retrieve it from + // TLS using new XPTI API + char *Key = 0; + uint64_t Value; + bool HaveKeyValue = + (xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS); + if (TraceType == xpti::trace_diagnostics) { std::string AggregatedData; if (Event && Event->reserved.payload && Event->reserved.payload->name && @@ -111,30 +118,22 @@ XPTI_CALLBACK_API void testCallback(uint16_t TraceType, } else if (TraceType == xpti::trace_task_begin) { if (Event) { std::string Message; - xpti::metadata_t *Metadata = xptiQueryMetadata(Event); - for (const auto &Item : *Metadata) { - std::string_view Key{xptiLookupString(Item.first)}; - if (Key == "queue_id") { - Message.append( - std::string("task_begin:") + Key.data() + std::string(":") + - std::to_string( - xpti::getMetadata(Item).second)); - } + // Since we have changed we send the "queue_id" information, we no longer + // have to check the metadata for the instance ID + if (HaveKeyValue) { + Message.append(std::string("task_begin:") + Key + std::string(":") + + std::to_string(Value)); } GReceivedNotifications.push_back(std::make_pair(TraceType, Message)); } } else if (TraceType == xpti::trace_task_end) { if (Event) { std::string Message; - xpti::metadata_t *Metadata = xptiQueryMetadata(Event); - for (const auto &Item : *Metadata) { - std::string_view Key{xptiLookupString(Item.first)}; - if (Key == "queue_id") { - Message.append( - std::string("task_end:") + Key.data() + std::string(":") + - std::to_string( - xpti::getMetadata(Item).second)); - } + // Since we have changed we send the "queue_id" information, we no longer + // have to check the metadata for the instance ID + if (HaveKeyValue) { + Message.append(std::string("task_end:") + Key + std::string(":") + + std::to_string(Value)); } GReceivedNotifications.push_back(std::make_pair(TraceType, Message)); } diff --git a/xpti/include/xpti/xpti_trace_framework.h b/xpti/include/xpti/xpti_trace_framework.h index 90a0e57047b2e..58fa9a117b6d5 100644 --- a/xpti/include/xpti/xpti_trace_framework.h +++ b/xpti/include/xpti/xpti_trace_framework.h @@ -90,6 +90,41 @@ XPTI_EXPORT_API uint64_t xptiGetUniversalId(); /// @param uid Unique 64 bit identifier. XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid); +/// @brief Returns stashed tuple +/// @details The XPTI Framework allows the notification mechanism to stash a +/// key-value tupe before a notification that can be accessed in the callback +/// handler fo the notification. This value is guranteed to be valid for the +/// duration of the notifiation. +/// @param key The Key of the stashed tuple is contained in this parameter after +/// the call +/// @param value The value that corresponds to key +/// @return The result code is XPTI_RESULT_SUCCESS when successful and +/// XPTI_RESULT_NOTFOUND if there is nothing stashed. Also returns error if +/// 'key' argument is invalid (XPTI_RESULT_INVALIDARG) +XPTI_EXPORT_API xpti::result_t xptiGetStashedTuple(char **key, uint64_t &value); + +/// @brief Stash a key-value tuple +/// @details Certain notifications in XPTI may want to provide mutable values +/// associated with Universal IDs that can be captured in the notification +/// handler. The framework currently allows one such tuple to be provided and +/// stashed. +/// @param key The Key of the tuple that is being stashed and needs to be +/// available for the duration of the notification call. +/// @param value The value that corresponds to key +/// @return The result code is XPTI_RESULT_SUCCESS when successful and +/// XPTI_RESULT_FAIL if key is invalid +XPTI_EXPORT_API xpti::result_t xptiStashTuple(const char *key, uint64_t value); + +/// @brief Un-Stash a key-value tuple or pop it from a stack, if one exists +/// @details Certain notifications in XPTI may want to provide mutable values +/// associated with Universal IDs that can be captured in the notification +/// handler. The framework currently allows such values to be provided and +/// stashed. This function pops the top of the stack tuple value when it is no +/// longer needed; Currently a stack depth of 1 is supported. +/// @return The result code is XPTI_RESULT_SUCCESS when successful and +/// XPTI_RESULT_FAIL if there are no tuples present +XPTI_EXPORT_API void xptiUnstashTuple(); + /// @brief Generates a unique ID /// @details When a tool is subscribing to the event stream and wants to /// generate task IDs that do not collide with unique IDs currently being @@ -498,6 +533,9 @@ typedef void (*xpti_finalize_t)(const char *); typedef uint64_t (*xpti_get_universal_id_t)(); typedef void (*xpti_set_universal_id_t)(uint64_t uid); typedef uint64_t (*xpti_get_unique_id_t)(); +typedef xpti::result_t (*xpti_stash_tuple_t)(const char *key, uint64_t value); +typedef xpti::result_t (*xpti_get_stashed_tuple_t)(char **key, uint64_t &value); +typedef void (*xpti_unstash_tuple_t)(); typedef xpti::string_id_t (*xpti_register_string_t)(const char *, char **); typedef const char *(*xpti_lookup_string_t)(xpti::string_id_t); typedef xpti::string_id_t (*xpti_register_object_t)(const char *, size_t, diff --git a/xpti/include/xpti/xpti_trace_framework.hpp b/xpti/include/xpti/xpti_trace_framework.hpp index c38a149fa401c..55f6c69760cdb 100644 --- a/xpti/include/xpti/xpti_trace_framework.hpp +++ b/xpti/include/xpti/xpti_trace_framework.hpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include "xpti/xpti_data_types.h" @@ -539,6 +541,24 @@ class scoped_notify { uint64_t m_instance; }; +// Scoped class that assists in stashing a tuple and clearing it when it is pout +// of scope +class stash_tuple { +public: + stash_tuple(const char *key, uint64_t value) : m_stashed(false) { + m_stashed = + (xptiStashTuple(key, value) == xpti::result_t::XPTI_RESULT_SUCCESS); + } + ~stash_tuple() { + if (m_stashed) { + xptiUnstashTuple(); + } + } + +private: + bool m_stashed; +}; + // --------------- Commented section of the code ------------- // // github.com/bombela/backward-cpp/blob/master/backward.hpp @@ -759,9 +779,12 @@ class tracepoint_t { // Method to extract the stream used by the current tracepoint type uint8_t stream_id() { return m_default_stream; } - // Method to extract the stream used by the current tracepoint type + // Method to extract the instance ID used by the current tracepoint type uint64_t instance_id() { return m_instID; } + // Method to override the instance ID generated by the xptiMakeEvent() call + void override_instance_id(uint64_t instance) { m_instID = instance; } + uint64_t universal_id() { if (m_payload && (m_payload->flags & diff --git a/xpti/src/xpti_proxy.cpp b/xpti/src/xpti_proxy.cpp index 2d17517ee3089..a09b970060033 100644 --- a/xpti/src/xpti_proxy.cpp +++ b/xpti/src/xpti_proxy.cpp @@ -43,6 +43,9 @@ enum functions_t { XPTI_FORCE_SET_TRACE_ENABLED, XPTI_CHECK_TRACE_ENABLED, XPTI_RELEASE_EVENT, + XPTI_STASH_TUPLE, + XPTI_GET_STASHED_TUPLE, + XPTI_UNSTASH_TUPLE, // All additional functions need to appear before // the XPTI_FW_API_COUNT enum XPTI_FW_API_COUNT ///< This enum must always be the last one in the list @@ -79,6 +82,9 @@ class ProxyLoader { {XPTI_TRACE_ENABLED, "xptiTraceEnabled"}, {XPTI_CHECK_TRACE_ENABLED, "xptiCheckTraceEnabled"}, {XPTI_FORCE_SET_TRACE_ENABLED, "xptiForceSetTraceEnabled"}, + {XPTI_STASH_TUPLE, "xptiStashTuple"}, + {XPTI_GET_STASHED_TUPLE, "xptiGetStashedTuple"}, + {XPTI_UNSTASH_TUPLE, "xptiUnstashTuple"}, {XPTI_RELEASE_EVENT, "xptiReleaseEvent"}}; public: @@ -250,6 +256,37 @@ XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid) { } } +XPTI_EXPORT_API xpti::result_t xptiStashTuple(const char *key, uint64_t value) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = xpti::ProxyLoader::instance().functionByIndex(XPTI_STASH_TUPLE); + if (f) { + return (*reinterpret_cast(f))(key, value); + } + } + return xpti::result_t::XPTI_RESULT_FAIL; +} + +XPTI_EXPORT_API xpti::result_t xptiSetGetStashedTuple(char **key, + uint64_t &value) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = + xpti::ProxyLoader::instance().functionByIndex(XPTI_GET_STASHED_TUPLE); + if (f) { + return (*reinterpret_cast(f))(key, value); + } + } + return xpti::result_t::XPTI_RESULT_FAIL; +} + +XPTI_EXPORT_API void xptiUnstashTuple() { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = xpti::ProxyLoader::instance().functionByIndex(XPTI_UNSTASH_TUPLE); + if (f) { + return (*reinterpret_cast(f))(); + } + } +} + XPTI_EXPORT_API uint64_t xptiGetUniqueId() { if (xpti::ProxyLoader::instance().noErrors()) { auto f = xpti::ProxyLoader::instance().functionByIndex(XPTI_GET_UNIQUE_ID); diff --git a/xptifw/CMakeLists.txt b/xptifw/CMakeLists.txt index ccdabf46c9810..4cbf597513772 100644 --- a/xptifw/CMakeLists.txt +++ b/xptifw/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.8) -set(XPTI_VERSION 0.4.1) +set(XPTI_VERSION 0.6.0) project (xptifw VERSION "${XPTI_VERSION}" LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) diff --git a/xptifw/src/xpti_trace_framework.cpp b/xptifw/src/xpti_trace_framework.cpp index 41ccaf6a7e27b..93d151094aba4 100644 --- a/xptifw/src/xpti_trace_framework.cpp +++ b/xptifw/src/xpti_trace_framework.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,9 @@ static_assert( std::is_trivially_destructible::value, "PlatformHelper is not trivial"); +// TLS variables to support stashing tupples and universal IDs +using stash_tuple_t = std::tuple; +static thread_local stash_tuple_t g_tls_stash_tuple = stash_tuple_t(nullptr, 0); static thread_local uint64_t g_tls_uid = xpti::invalid_uid; namespace xpti { @@ -359,12 +363,16 @@ class Tracepoints { // Protect simultaneous insert operations on the metadata tables { + xpti::result_t res; std::lock_guard HashLock(MMetadataMutex); if (Event->reserved.metadata.count(KeyID)) { - return xpti::result_t::XPTI_RESULT_DUPLICATE; + // One already existed, but we overwrote it + res = xpti::result_t::XPTI_RESULT_DUPLICATE; + } else { + res = xpti::result_t::XPTI_RESULT_SUCCESS; } Event->reserved.metadata[KeyID] = ValueID; - return xpti::result_t::XPTI_RESULT_SUCCESS; + return res; } } @@ -818,6 +826,38 @@ class Framework { void setUniversalID(uint64_t uid) noexcept { g_tls_uid = uid; } + xpti::result_t stashTuple(const char *key, uint64_t value) { + if (!key) + return xpti::result_t::XPTI_RESULT_FAIL; + + std::get<0>(g_tls_stash_tuple) = key; + std::get<1>(g_tls_stash_tuple) = value; + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + xpti::result_t getStashedTuple(char **key, uint64_t &value) { + if (!key) + return xpti::result_t::XPTI_RESULT_INVALIDARG; + + const char *tls_key = std::get<0>(g_tls_stash_tuple); + if (!tls_key) + return xpti::result_t::XPTI_RESULT_NOTFOUND; + + (*key) = const_cast(tls_key); + value = std::get<1>(g_tls_stash_tuple); + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + void unstashTuple() { + if (!std::get<0>(g_tls_stash_tuple)) + return; + + // std::get<0>(g_tls_stash_tuple) = nullptr; + // std::get<1>(g_tls_stash_tuple) = 0; + // We will use the actual unstash code when we implement a stack to allow + // multiple stashes/thread + } + bool checkTraceEnabled(uint16_t stream, uint16_t type) { if (MTraceEnabled) { return MNotifier.checkSubscribed(stream, type); @@ -1086,6 +1126,19 @@ XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid) { xpti::Framework::instance().setUniversalID(uid); } +XPTI_EXPORT_API xpti::result_t xptiStashTuple(const char *key, uint64_t value) { + return xpti::Framework::instance().stashTuple(key, value); +} + +XPTI_EXPORT_API xpti::result_t xptiGetStashedTuple(char **key, + uint64_t &value) { + return xpti::Framework::instance().getStashedTuple(key, value); +} + +XPTI_EXPORT_API void xptiUnstashTuple() { + xpti::Framework::instance().unstashTuple(); +} + XPTI_EXPORT_API uint16_t xptiRegisterUserDefinedTracePoint(const char *ToolName, uint8_t UserDefinedTP) { uint8_t ToolID = xpti::Framework::instance().registerVendor(ToolName); From 7c70e59db3ec813021beb970ebd21034586da53e Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Thu, 21 Mar 2024 10:28:46 +0000 Subject: [PATCH 06/22] [SYCL][Graph][HIP] Set minimum ROCm version for graphs (#13035) Tests UR PR https://github.com/oneapi-src/unified-runtime/pull/1447 that only reports support for UR command-buffers on ROCm 5.5.1 and later to work around HIP driver bugs related to HIP-Graph in earlier version. This requirement is also explicitly mentioned in the design doc. --- sycl/doc/design/CommandGraph.md | 4 +++- sycl/plugins/unified_runtime/CMakeLists.txt | 14 +++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index 8f5d715a32925..2fc1e75749364 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -405,8 +405,10 @@ The HIP backend offers a Graph managemenet API very similar to CUDA Graph feature for batching series of operations. The SYCL Graph HIP backend implementation is therefore very similar to that of CUDA. +The minimum version of ROCm required to support `sycl_ext_oneapi_graph` is 5.5.1. + UR commands (e.g. kernels) are mapped as graph nodes using the -[HIP Management API](https://docs.amd.com/projects/HIP/en/docs-5.5.0/doxygen/html/group___graph.html). +[HIP Management API](https://rocm.docs.amd.com/projects/HIP/en/docs-5.5.1/doxygen/html/group___graph.html). Synchronization between commands (UR sync-points) is implemented using graph dependencies. Executable HIP Graphs can be submitted to a HIP stream diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 0eebfa5a02014..b4f01966d5418 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -82,13 +82,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit ed1f8bf618c88eaabea6bde0f6c06fc265f3b49f - # Merge: ca5c3421 69c43b45 - # Author: Kenneth Benzie (Benie) - # Date: Tue Mar 19 21:00:20 2024 +0000 - # Merge pull request #1326 from hdelan/refactor-guess-local-worksize - # [CUDA][HIP] Fix bug in guess local worksize funcs and improve local worksize guessing in HIP adapter - set(UNIFIED_RUNTIME_TAG ed1f8bf618c88eaabea6bde0f6c06fc265f3b49f) + # commit 5f4dd113824e90522d813420932c14072dc3049d + # Merge: ed1f8bf b551c77 + # Author: Ewan Crawford + # Date: Fri Mar 15 10:22:39 2024 +0000 + # Merge pull request #1447 from Bensuo/ewan/rocm_5_5_1 + # [HIP][CMDBUF] Require ROCm 5.5.1 for HIP command-buffers + set(UNIFIED_RUNTIME_TAG 5f4dd113824e90522d813420932c14072dc3049d) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From daaece06ce68544eaae078899c559f571297d8c0 Mon Sep 17 00:00:00 2001 From: Ben Tracy Date: Thu, 21 Mar 2024 10:30:44 +0000 Subject: [PATCH 07/22] [SYCL][Graph] Fix access modes not being respected (#13011) - Fix access modes not being respected and creating unnecessary edges - Update printing E2E tests since output has changed - Add unit tests for access modes Addresses #12473 --- sycl/source/detail/graph_impl.cpp | 4 +- sycl/source/detail/graph_impl.hpp | 31 ++++- .../Graph/Explicit/debug_print_graph.cpp | 22 ++-- .../Explicit/debug_print_graph_verbose.cpp | 32 ++--- .../Graph/RecordReplay/debug_print_graph.cpp | 22 ++-- .../debug_print_graph_verbose.cpp | 26 ++-- .../Extensions/CommandGraph/CMakeLists.txt | 1 + .../Extensions/CommandGraph/CommandGraph.cpp | 120 +++++++++++++++++- .../Extensions/CommandGraph/Regressions.cpp | 60 +++++++++ 9 files changed, 259 insertions(+), 59 deletions(-) create mode 100644 sycl/unittests/Extensions/CommandGraph/Regressions.cpp diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index e6322b8240dd3..a318ed97d0abd 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -439,12 +439,12 @@ graph_impl::add(node_type NodeType, } // Look through the graph for nodes which share this requirement for (auto &Node : MNodeStorage) { - if (Node->hasRequirement(Req)) { + if (Node->hasRequirementDependency(Req)) { bool ShouldAddDep = true; // If any of this node's successors have this requirement then we skip // adding the current node as a dependency. for (auto &Succ : Node->MSuccessors) { - if (Succ.lock()->hasRequirement(Req)) { + if (Succ.lock()->hasRequirementDependency(Req)) { ShouldAddDep = false; break; } diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 73064b021edc5..30cc78c70ab4d 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -167,17 +167,38 @@ class node_impl { } return *this; } + /// Checks if this node should be a dependency of another node based on + /// accessor requirements. This is calculated using access modes if a + /// requirement to the same buffer is found inside this node. + /// @param IncomingReq Incoming requirement. + /// @return True if a dependency is needed, false if not. + bool hasRequirementDependency(sycl::detail::AccessorImplHost *IncomingReq) { + access_mode InMode = IncomingReq->MAccessMode; + switch (InMode) { + case access_mode::read: + case access_mode::read_write: + case access_mode::atomic: + break; + // These access modes don't care about existing buffer data, so we don't + // need a dependency. + case access_mode::write: + case access_mode::discard_read_write: + case access_mode::discard_write: + return false; + } - /// Checks if this node has a given requirement. - /// @param Requirement Requirement to lookup. - /// @return True if \p Requirement is present in node, false otherwise. - bool hasRequirement(sycl::detail::AccessorImplHost *IncomingReq) { for (sycl::detail::AccessorImplHost *CurrentReq : MCommandGroup->getRequirements()) { if (IncomingReq->MSYCLMemObj == CurrentReq->MSYCLMemObj) { - return true; + access_mode CurrentMode = CurrentReq->MAccessMode; + // Since we have an incoming read requirement, we only care + // about requirements on this node if they are write + if (CurrentMode != access_mode::read) { + return true; + } } } + // No dependency necessary return false; } diff --git a/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp b/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp index 92fda2c837dc6..b7a5c689de1ed 100644 --- a/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp +++ b/sycl/test-e2e/Graph/Explicit/debug_print_graph.cpp @@ -12,22 +12,22 @@ // CHECK-SAME: [style=bold, label="ID = 0x[[#NODE2]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE0_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]]" // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp b/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp index e06deb61a205c..ddc8d9818370b 100644 --- a/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp +++ b/sycl/test-e2e/Graph/Explicit/debug_print_graph_verbose.cpp @@ -14,28 +14,28 @@ // CHECK-SAME: [style=bold, label="ID = 0x[[#NODE2]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE0_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n // CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR4:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR5:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR6:]]\n // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR4]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR7:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR8:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR9:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR10:]]\n"]; -// CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" -// CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE2]]" +// CHECK-DAG: "0x[[#%x,NODE3:]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n // CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR11:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR12:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR13:]]\n // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR11]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR14:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR15:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR16:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR17:]]\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" -// CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE2_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n -// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR18:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR19:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR20:]]\n -// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR18]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR21:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR22:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n"]; +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE4:]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR18:]] Dst: 0x[[#%x,ADDR19:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]] // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR25:]] Dst: 0x[[#%x,ADDR26:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR20:]] Dst: 0x[[#%x,ADDR21:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR27:]] Dst: 0x[[#%x,ADDR28:]]\n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11add_kernelsItEN4sycl3_V13ext6oneapi12experimental4nodeENS4_13command_graphILNS4_11graph_stateE0EEEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constISA_E4typeEEEvEESH_SH_ENKUlRNS1_7handlerEE1_clESJ_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR22:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n +// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR22]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR25:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR26:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR27:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR28:]]\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_EXPLICIT diff --git a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp index 6ef999f7d41a7..5729c36160acc 100644 --- a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph.cpp @@ -12,22 +12,22 @@ // CHECK-SAME: [style=bold, label="ID = 0x[[#NODE2]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE0_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]]" // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp index c1697fc755ef2..313678b1b3932 100644 --- a/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/debug_print_graph_verbose.cpp @@ -17,26 +17,26 @@ // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR4]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR7:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR8:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR9:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR10:]]\n"]; // CHECK-NEXT: "0x[[#NODE1]]" -> "0x[[#NODE2]]" // CHECK-NEXT: "0x[[#%x,NODE3:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE3]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n // CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR11:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR12:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR13:]]\n // CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR11]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR14:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR15:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR16:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR17:]]\n"]; -// CHECK-NEXT: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE3]]" +// CHECK-DAG: "0x[[#%x,NODE7:]]" -> "0x[[#NODE3]]" // CHECK-NEXT: "0x[[#%x,NODE4:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE2_clESE_EUlNS1_4itemILi1ELb1EEEE_\n -// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR18:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR19:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR20:]]\n -// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR18]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR21:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR22:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE4]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR18:]] Dst: 0x[[#%x,ADDR19:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE4]]" -// CHECK-DAG: "0x[[#NODE2]]" -> "0x[[#NODE4]]" +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE4]] // CHECK-NEXT: "0x[[#%x,NODE5:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Device \nSrc: 0x[[#%x,ADDR25:]] Dst: 0x[[#%x,ADDR26:]]\n"]; +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE5]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR20:]] Dst: 0x[[#%x,ADDR21:]]\n"]; // CHECK-DAG: "0x[[#NODE3]]" -> "0x[[#NODE5]]" -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE5]] // CHECK-NEXT: "0x[[#%x,NODE6:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = CGCopy Device-to-Host \nSrc: 0x[[#%x,ADDR27:]] Dst: 0x[[#%x,ADDR28:]]\n"]; -// CHECK-DAG: "0x[[#NODE4]]" -> "0x[[#NODE6]]" -// CHECK-NEXT: "0x[[#%x,NODE7:]]" -// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = None \n"]; -// CHECK-DAG: "0x[[#NODE6]]" -> "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE6]]\nTYPE = None \n"]; +// CHECK-DAG: "0x[[#NODE5]]" -> "0x[[#NODE6]]" +// CHECK-NEXT: "0x[[#NODE7]]" +// CHECK-SAME: [style=bold, label="ID = 0x[[#NODE7]]\nTYPE = CGExecKernel \nNAME = _ZTSZZ11run_kernelsItEN4sycl3_V15eventENS1_5queueEmNS1_6bufferIT_Li1ENS1_6detail17aligned_allocatorINSt12remove_constIS5_E4typeEEEvEESC_SC_ENKUlRNS1_7handlerEE1_clESE_EUlNS1_4itemILi1ELb1EEEE_\n +// CHECK-SAME: ARGS = \n0) Type: Accessor Ptr: 0x[[#%x,ADDR22:]]\n1) Type: STD_Layout Ptr: 0x[[#%x,ADDR23:]]\n2) Type: STD_Layout Ptr: 0x[[#%x,ADDR24:]]\n +// CHECK-SAME: 3) Type: STD_Layout Ptr: 0x[[#ADDR22]]\n4) Type: Accessor Ptr: 0x[[#%x,ADDR25:]]\n5) Type: STD_Layout Ptr: 0x[[#%x,ADDR26:]]\n6) Type: STD_Layout Ptr: 0x[[#%x,ADDR27:]]\n7) Type: STD_Layout Ptr: 0x[[#%x,ADDR28:]]\n"]; +// CHECK-DAG: "0x[[#NODE1]]" -> "0x[[#NODE7]]" #define GRAPH_E2E_RECORD_REPLAY diff --git a/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt b/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt index 712d7345fd895..2232ce4abb54f 100644 --- a/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt +++ b/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt @@ -7,5 +7,6 @@ add_sycl_unittest(CommandGraphExtensionTests OBJECT InOrderQueue.cpp MultiThreaded.cpp Queries.cpp + Regressions.cpp Subgraph.cpp ) diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 07fa7434cca00..63b5b2a04de05 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -81,7 +81,7 @@ TEST_F(CommandGraphTest, Finalize) { // Add a node that depends on Node1 due to the accessor auto Node3 = Graph.add([&](sycl::handler &cgh) { - sycl::accessor A(Buf, cgh, sycl::write_only, sycl::no_init); + sycl::accessor A(Buf, cgh, sycl::read_write); cgh.single_task>([]() {}); }); @@ -510,3 +510,121 @@ TEST_F(CommandGraphTest, FillMemsetNodes) { sycl::free(USMPtr, Queue); } } + +// Test that the expected dependencies are created when recording a graph node +// containing an accessor with mode FirstMode, followed by one containing an +// accessor with mode SecondMode +template +void testAccessorModeCombo(sycl::queue Queue) { + buffer Buffer{range<1>{16}}; + + ext::oneapi::experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + Graph.begin_recording(Queue); + // Create the first node with a write mode + auto EventFirst = Queue.submit([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.single_task>([]() {}); + }); + + auto EventSecond = Queue.submit([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + CGH.single_task>([]() {}); + }); + Graph.end_recording(Queue); + + EXPECT_EQ(Graph.get_root_nodes().size(), ShouldCreateDep ? 1ul : 2ul); + + experimental::node NodeFirst = + experimental::node::get_node_from_event(EventFirst); + EXPECT_EQ(NodeFirst.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeFirst.get_successors().size(), ShouldCreateDep ? 1ul : 0ul); + + experimental::node NodeSecond = + experimental::node::get_node_from_event(EventSecond); + EXPECT_EQ(NodeSecond.get_predecessors().size(), ShouldCreateDep ? 1ul : 0ul); + EXPECT_EQ(NodeSecond.get_successors().size(), 0ul); +} + +// Tests that access modes are correctly respected when recording graph nodes +TEST_F(CommandGraphTest, AccessorModeEdges) { + + // Testing access_mode::write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + + // Testing access_mode::read and others + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + + // Testing access_mode::read_write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + + // Testing access_mode::discard_read_write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + + // Testing access_mode::discard_write and others + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + + // Testing access_mode::atomic and others + testAccessorModeCombo( + Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo(Queue); + testAccessorModeCombo( + Queue); + testAccessorModeCombo(Queue); +} diff --git a/sycl/unittests/Extensions/CommandGraph/Regressions.cpp b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp new file mode 100644 index 0000000000000..17b58f542d760 --- /dev/null +++ b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp @@ -0,0 +1,60 @@ +//==------------------------ Regressions.cpp -------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "Common.hpp" + +using namespace sycl; +using namespace sycl::ext::oneapi; + +// Tests in this file are based on specific error reports + +// Regression test example based on a reported issue with accessor modes not +// being respected in graphs. The test records 3 kernel nodes which all have +// read only dependencies on the same two buffers, with a write dependency on a +// buffer which is different per kernel. This should result in no edges being +// created between these nodes because the accessor mode combinations do not +// indicate a need for dependencies. +// Originally reported here: https://github.com/intel/llvm/issues/12473 +TEST_F(CommandGraphTest, AccessorModeRegression) { + buffer BufferA{range<1>{16}}; + buffer BufferB{range<1>{16}}; + buffer BufferC{range<1>{16}}; + buffer BufferD{range<1>{16}}; + buffer BufferE{range<1>{16}}; + Graph.begin_recording(Queue); + + auto EventA = Queue.submit([&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccC = BufferC.get_access(CGH); + CGH.single_task>([]() {}); + }); + auto EventB = Queue.submit([&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccD = BufferD.get_access(CGH); + CGH.single_task>([]() {}); + }); + auto EventC = Queue.submit([&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + auto AccE = BufferE.get_access(CGH); + CGH.single_task>([]() {}); + }); + + Graph.end_recording(Queue); + + experimental::node NodeA = experimental::node::get_node_from_event(EventA); + EXPECT_EQ(NodeA.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeA.get_successors().size(), 0ul); + experimental::node NodeB = experimental::node::get_node_from_event(EventB); + EXPECT_EQ(NodeB.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeB.get_successors().size(), 0ul); + experimental::node NodeC = experimental::node::get_node_from_event(EventC); + EXPECT_EQ(NodeC.get_predecessors().size(), 0ul); + EXPECT_EQ(NodeC.get_successors().size(), 0ul); +} From 2b0ec336d77f8e12364507347285e77905ab4687 Mon Sep 17 00:00:00 2001 From: Nikita Kornev Date: Thu, 21 Mar 2024 12:02:02 +0100 Subject: [PATCH 08/22] [CI] Reduce cts_exclude_filter (#13079) Also turned off `h_item` due to fail #12926 --- devops/cts_exclude_filter | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/devops/cts_exclude_filter b/devops/cts_exclude_filter index a03214a6e2314..e14c4e0eb1af9 100644 --- a/devops/cts_exclude_filter +++ b/devops/cts_exclude_filter @@ -1,8 +1,5 @@ -reduction -accessor -vector_swizzles +# These two take too much time kernel_bundle -spec_constants marray -group_functions -atomic_ref +# https://github.com/intel/llvm/issues/12926 +h_item From d89ca597e488ea61cf51bf8d06246ac0a069a855 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 21 Mar 2024 11:19:07 +0000 Subject: [PATCH 09/22] [libclc] Several little QOL improvements to libclc-remangler (#13073) ----- [libclc] Update deprecated method use in remangler llvm::StringRef::startswith -> llvm::StringRef::starts_with ----- [libclc] Fix up improper use of ExitOnError ExitOnError takes a llvm::Expected, but we were passing it the result of llvm::parseIR - std::unique_ptr - which, even when null, is not an error condition. Thus invalid IR input was silently being accepted until it would segfault on accessing the module. ----- [libclc] Open remangler file system at PWD, not root I don't think there's any reason to open it at root. Opening it at the current working directory is more intuitive for developers using relative paths for input/output files. Previously "--input-ir foo.ll" would try and open "/foo.ll", which depending on the system is likely a permissions error, or a missing file, or even an unintended file. ----- --- libclc/utils/libclc-remangler/LibclcRemangler.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/libclc/utils/libclc-remangler/LibclcRemangler.cpp b/libclc/utils/libclc-remangler/LibclcRemangler.cpp index c9e92730c3e31..cdc0540221d31 100644 --- a/libclc/utils/libclc-remangler/LibclcRemangler.cpp +++ b/libclc/utils/libclc-remangler/LibclcRemangler.cpp @@ -777,12 +777,17 @@ class LibCLCRemangler : public ASTConsumer { void Initialize(ASTContext &C) override { ASTCtx = &C; - SMDiagnostic Err; std::unique_ptr const Buff = ExitOnErr( errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputIRFilename))); + + SMDiagnostic Err; std::unique_ptr const M = - ExitOnErr(Expected>( - parseIR(Buff.get()->getMemBufferRef(), Err, LLVMCtx))); + parseIR(Buff.get()->getMemBufferRef(), Err, LLVMCtx); + + if (!M) { + Err.print("libclc-remangler", errs()); + exit(1); + } handleModule(M.get()); } @@ -840,7 +845,7 @@ class LibCLCRemangler : public ASTConsumer { } bool remangleFunction(Function &Func, llvm::Module *M) { - if (!Func.getName().startswith("_Z")) + if (!Func.getName().starts_with("_Z")) return true; std::string const MangledName = Func.getName().str(); @@ -958,7 +963,7 @@ int main(int argc, const char **argv) { // Use a default Compilation DB instead of the build one, as it might contain // toolchain specific options, not compatible with clang. - FixedCompilationDatabase Compilations("/", std::vector()); + FixedCompilationDatabase Compilations(".", std::vector()); ClangTool Tool(Compilations, ExpectedParser->getSourcePathList()); LibCLCRemanglerActionFactory LRAF{}; From 597619c4229c1f1b147322c3c5ec64dac59124da Mon Sep 17 00:00:00 2001 From: LU-JOHN <111294400+LU-JOHN@users.noreply.github.com> Date: Thu, 21 Mar 2024 07:00:11 -0500 Subject: [PATCH 10/22] [SYCL][E2E] Ensuring lowering of llvm.bitreverse is functionally correct (#12774) Ensure that lowering of llvm.bitreverse* intrinsics by llvm-spirv is functionally correct. --------- Signed-off-by: Lu, John --- .../LLVMIntrinsicLowering/bitreverse.cpp | 236 ++++++++++++++++++ .../test-e2e/LLVMIntrinsicLowering/common.hpp | 27 ++ 2 files changed, 263 insertions(+) create mode 100644 sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp create mode 100644 sycl/test-e2e/LLVMIntrinsicLowering/common.hpp diff --git a/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp new file mode 100644 index 0000000000000..239b018a9f30f --- /dev/null +++ b/sycl/test-e2e/LLVMIntrinsicLowering/bitreverse.cpp @@ -0,0 +1,236 @@ +// Test that llvm.bitreverse is lowered correctly by llvm-spirv. + +// UNSUPPORTED: hip || cuda + +// Make dump directory. +// RUN: rm -rf %t.spvdir && mkdir %t.spvdir + +// Ensure that SPV_KHR_bit_instructions is disabled so that translator +// will lower llvm.bitreverse.* intrinsics instead of relying on SPIRV +// BitReverse instruction. +// Also build executable with SPV dump. +// RUN: %{build} -o %t.out -O2 -Xspirv-translator --spirv-ext=-SPV_KHR_bit_instructions -fsycl-dump-device-code=%t.spvdir + +// Rename SPV file to explictly known filename. +// RUN: mv %t.spvdir/*.spv %t.spvdir/dump.spv + +// Convert to text. +// RUN: llvm-spirv -to-text %t.spvdir/dump.spv + +// Check that all lowerings are done by llvm-spirv. +// RUN: cat %t.spvdir/dump.spt | FileCheck %s --check-prefix CHECK-SPV --implicit-check-not=BitReverse + +// Execute to ensure lowering has correct functionality. +// RUN: %{run} %t.out + +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// TODO FIXME Change NOT_READY to RUN when llvm.bitreverse.* is supported. + +// Build without lowering explicitly disabled. +// NOT_READY: %{build} -o %t.bitinstructions.out + +// Execution should still be correct. +// NOT_READY: %{run} %t.bitinstructions.out + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i32" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i64" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v2i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v2i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v2i32" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v3i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v3i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v3i32" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v4i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v4i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v4i32" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v8i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v8i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v8i32" + +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v16i8" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v16i16" +// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v16i32" + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i32" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i64" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v2i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v2i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v2i32" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v3i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v3i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v3i32" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v4i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v4i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v4i32" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v8i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v8i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v8i32" Export + +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i8" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i16" Export +// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i32" Export + +#include +#include +#include +#include "common.hpp" + +using namespace sycl; + +template +__attribute__((optnone, noinline)) TYPE reference_reverse(TYPE a, const int bitlength) { + TYPE ret = 0; + for (auto i = 0; i>=1; + } + return ret; +} + +template +__attribute__((noinline)) TYPE reverse(TYPE a, int bitlength) { + if (bitlength==8) { + // Avoid bug with __builtin_elementwise_bitreverse(a) on scalar 8-bit types. + a = ((0x55 & a) << 1) | (0x55 & (a >> 1)); + a = ((0x33 & a) << 2) | (0x33 & (a >> 2)); + return (a << 4) | (a >> 4); + } else if (bitlength==16) { + // Avoid bug with __builtin_elementwise_bitreverse(a) on scalar 16-bit types. + a = ((0x5555 & a) << 1) | (0x5555 & (a >> 1)); + a = ((0x3333 & a) << 2) | (0x3333 & (a >> 2)); + a = ((0x0F0F & a) << 4) | (0x0F0F & (a >> 4)); + return (a << 8) | (a >> 8); + } else + return __builtin_elementwise_bitreverse(a); +} + +template class BitreverseTest; + +#define NUM_TESTS 1024 + +template +void do_scalar_bitreverse_test() { + queue q; + + TYPE *Input = (TYPE *) malloc_shared(sizeof(TYPE) * NUM_TESTS, q.get_device(), q.get_context()); + TYPE *Output = (TYPE *) malloc_shared(sizeof(TYPE) * NUM_TESTS, q.get_device(), q.get_context()); + + for (unsigned i=0; i(); + q.submit([=](handler &cgh) { + cgh.single_task> ([=]() { + for (unsigned i=0; i +void do_vector_bitreverse_test() { + queue q; + + VTYPE *Input = (VTYPE *) malloc_shared(sizeof(VTYPE) * NUM_TESTS, q.get_device(), q.get_context()); + VTYPE *Output = (VTYPE *) malloc_shared(sizeof(VTYPE) * NUM_TESTS, q.get_device(), q.get_context()); + + for (unsigned i=0; i::type>(); + + q.submit([=](handler &cgh) { + cgh.single_task> ([=]() { + for (unsigned i=0; i(); + do_scalar_bitreverse_test(); + do_scalar_bitreverse_test(); + do_scalar_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + do_vector_bitreverse_test(); + + return 0; +} + diff --git a/sycl/test-e2e/LLVMIntrinsicLowering/common.hpp b/sycl/test-e2e/LLVMIntrinsicLowering/common.hpp new file mode 100644 index 0000000000000..45c0a99840d93 --- /dev/null +++ b/sycl/test-e2e/LLVMIntrinsicLowering/common.hpp @@ -0,0 +1,27 @@ +//==------- common.hpp - DPC++ ESIMD on-device test ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +template class KernelID; + +template T get_rand() { + using Tuint = std::conditional_t< + sizeof(T) == 1, uint8_t, + std::conditional_t< + sizeof(T) == 2, uint16_t, + std::conditional_t>>>; + Tuint v = rand(); + if constexpr (sizeof(Tuint) > 4) + v = (v << 32) | rand(); + return sycl::bit_cast(v); +} From 98894fe5e520072b17da3cc34598f8ac790249ca Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Thu, 21 Mar 2024 06:50:57 -0700 Subject: [PATCH 11/22] [UR][L0] Fix Native Host memory usage on device with copy back sync (#13014) pre-commit PR for https://github.com/oneapi-src/unified-runtime/pull/1439 --------- Signed-off-by: Neil R. Spruit Co-authored-by: Kenneth Benzie (Benie) --- sycl/plugins/unified_runtime/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index b4f01966d5418..32ebe3784bf2f 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -82,13 +82,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 5f4dd113824e90522d813420932c14072dc3049d - # Merge: ed1f8bf b551c77 - # Author: Ewan Crawford - # Date: Fri Mar 15 10:22:39 2024 +0000 - # Merge pull request #1447 from Bensuo/ewan/rocm_5_5_1 - # [HIP][CMDBUF] Require ROCm 5.5.1 for HIP command-buffers - set(UNIFIED_RUNTIME_TAG 5f4dd113824e90522d813420932c14072dc3049d) + # commit c98fdbcf1f43ce132fbae75336bda984e4ce2e78 + # Merge: 5f4dd113 9b3cf9d3 + # Author: Kenneth Benzie (Benie) + # Date: Thu Mar 21 10:51:45 2024 +0000 + # Merge pull request #1439 from nrspruit/fix_device_native_proxy_buffer + # [L0] Fix Native Host memory usage on device with copy back sync + set(UNIFIED_RUNTIME_TAG c98fdbcf1f43ce132fbae75336bda984e4ce2e78) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From 9e4768ca9849e7188221c0e2894282730e3b1bde Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 21 Mar 2024 13:56:19 +0000 Subject: [PATCH 12/22] [SYCL][libclc] Add generic addrspace overloads of math builtins (#13015) The generic implementations of the math builtins which take pointer arguments were using unqualified address spaces. This could either resolve to the generic address space or the private address space, depending on whether the target supports the generic address space or not. The newer unified OpenCL C specification is clearer in mandating that all targets must provide overloads on the explicitly qualified 'private' address space, as well as optionally defining ones on the (unqualified) generic address space. This meant that most of these math builtins were lacking one overload: either the private or generic one, depending on which target was compiling the builtins. One notable exception here is NVIDIA, which maps the private and generic address spaces to the same target address space. Thus declaring builtins overloaded on these two address spaces results in a mangling clash, which we can't have. Therefore we now say that NVIDIA targets don't support the generic address space for the purposes of these builtins. In reality, the builtins with the private address space are functionally equivalent to the generic ones, so users won't notice. For the sake of code clarity, although the 'generic' keyword is technically reserved, we know that clang defines it to be the corresponding unqualified generic address space, so we use that to be explicit. We always compile with clang so it shouldn't be a problem with portability. With this we can also enable a LIT test for HIP, which was previously failing as it couldn't find the generic address space overloads of the fract and lgamma_r builtins. There are other builtins that this treatment (may) need applied to, such as the vload and vstore variants. Those will be handled in a subsequent patch. --- libclc/CMakeLists.txt | 15 +- libclc/generic/include/clc/math/fract.inc | 5 + libclc/generic/include/clc/math/frexp.inc | 5 + libclc/generic/include/clc/math/modf.inc | 5 + libclc/generic/include/clc/math/remquo.h | 9 + libclc/generic/include/clc/math/sincos.inc | 5 + libclc/generic/include/spirv/spirv_builtins.h | 579 +++++++++++++----- libclc/generic/lib/math/fract.inc | 11 +- libclc/generic/lib/math/frexp.cl | 9 + libclc/generic/lib/math/modf.inc | 9 +- libclc/generic/lib/math/remquo.cl | 9 + libclc/generic/lib/math/remquo.inc | 6 +- libclc/generic/lib/math/sincos.inc | 5 + libclc/generic/libspirv/math/fract.inc | 7 +- libclc/generic/libspirv/math/frexp.cl | 9 + libclc/generic/libspirv/math/lgamma_r.cl | 9 + libclc/generic/libspirv/math/lgamma_r.inc | 2 +- libclc/generic/libspirv/math/modf.inc | 10 +- libclc/generic/libspirv/math/remquo.cl | 9 + libclc/generic/libspirv/math/remquo.inc | 6 +- libclc/generic/libspirv/math/sincos.inc | 6 + sycl/test-e2e/USM/math.cpp | 1 - 22 files changed, 561 insertions(+), 170 deletions(-) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 0c7a49b3f9dcd..6d3d1502d9d24 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -400,6 +400,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) endif() message( " DEVICE: ${d} ( ${${d}_aliases} )" ) + set ( supports_generic_addrspace TRUE ) if ( ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" ) if( ${ARCH} STREQUAL "spirv" ) set( t "spir--" ) @@ -416,6 +417,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) elseif( ${ARCH} STREQUAL "nvptx" OR ${ARCH} STREQUAL "nvptx64" ) set( build_flags ) set( opt_flags -O3 "--nvvm-reflect-enable=false" ) + # Note: when declaring builtins, we don't consider NVIDIA as supporting + # the generic address space. This is because it maps to the same target + # address space as the private address space, resulting in a mangling + # clash. + # Since we can't declare builtins overloaded on both address spaces + # simultaneously, we choose declare the builtins using the private space, + # which will also work for the generic address space. + set( supports_generic_addrspace FALSE ) elseif( ${ARCH} STREQUAL "clspv64" ) set( t "spir64--" ) set( build_flags "-Wno-unknown-assumption") @@ -437,8 +446,10 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) "+cl_khr_fp16," "+__opencl_c_3d_image_writes," "+__opencl_c_images," - "+cl_khr_3d_image_writes," - "+__opencl_c_generic_address_space") + "+cl_khr_3d_image_writes") + if(supports_generic_addrspace) + string( APPEND CL_3_0_EXTENSIONS ",+__opencl_c_generic_address_space" ) + endif() list( APPEND flags ${CL_3_0_EXTENSIONS}) # Add platform specific flags diff --git a/libclc/generic/include/clc/math/fract.inc b/libclc/generic/include/clc/math/fract.inc index 71e6e8a921121..a85b30ce28d9f 100644 --- a/libclc/generic/include/clc/math/fract.inc +++ b/libclc/generic/include/clc/math/fract.inc @@ -23,3 +23,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, global __CLC_GENTYPE *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, local __CLC_GENTYPE *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, generic __CLC_GENTYPE *iptr); +#endif diff --git a/libclc/generic/include/clc/math/frexp.inc b/libclc/generic/include/clc/math/frexp.inc index 2a6f7f5823969..f68206e67df76 100644 --- a/libclc/generic/include/clc/math/frexp.inc +++ b/libclc/generic/include/clc/math/frexp.inc @@ -1,3 +1,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, global __CLC_INTN *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, local __CLC_INTN *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, private __CLC_INTN *iptr); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, generic __CLC_INTN *iptr); +#endif diff --git a/libclc/generic/include/clc/math/modf.inc b/libclc/generic/include/clc/math/modf.inc index 42bcf625686d2..558376570812d 100644 --- a/libclc/generic/include/clc/math/modf.inc +++ b/libclc/generic/include/clc/math/modf.inc @@ -23,3 +23,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, global __CLC_GENTYPE *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, local __CLC_GENTYPE *iptr); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, generic __CLC_GENTYPE *iptr); +#endif diff --git a/libclc/generic/include/clc/math/remquo.h b/libclc/generic/include/clc/math/remquo.h index 7daf82fc34b33..a367208894fe6 100644 --- a/libclc/generic/include/clc/math/remquo.h +++ b/libclc/generic/include/clc/math/remquo.h @@ -15,4 +15,13 @@ #include #undef __CLC_ADDRESS_SPACE +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif + #undef __CLC_FUNCTION diff --git a/libclc/generic/include/clc/math/sincos.inc b/libclc/generic/include/clc/math/sincos.inc index 423b25fb7534b..4a1b74cc7ac2c 100644 --- a/libclc/generic/include/clc/math/sincos.inc +++ b/libclc/generic/include/clc/math/sincos.inc @@ -1,3 +1,8 @@ _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, global __CLC_GENTYPE * cosval); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, local __CLC_GENTYPE * cosval); _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, private __CLC_GENTYPE * cosval); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) + _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, generic __CLC_GENTYPE * cosval); +#endif diff --git a/libclc/generic/include/spirv/spirv_builtins.h b/libclc/generic/include/spirv/spirv_builtins.h index ca15fdf3c6547..2de6f72cfb077 100644 --- a/libclc/generic/include/spirv/spirv_builtins.h +++ b/libclc/generic/include/spirv/spirv_builtins.h @@ -14361,76 +14361,76 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t __spirv_ocl_fmod(__clc_vec16_fp16_t, __clc_vec16_fp16_t); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_fract(__clc_fp32_t, - __clc_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_fract(__clc_fp32_t, __clc_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_fract(__clc_fp32_t, __clc_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_fract(__clc_fp32_t, __clc_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t *); +__spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t *); +__spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t *); +__spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t *); +__spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t *); +__spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_fract(__clc_fp64_t, - __clc_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_fract(__clc_fp64_t, __clc_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_fract(__clc_fp64_t, __clc_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_fract(__clc_fp64_t, __clc_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t *); +__spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t *); +__spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t *); +__spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t *); +__spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t *); +__spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -14438,114 +14438,162 @@ __spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t __global *); #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_fract(__clc_fp16_t, - __clc_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_fract(__clc_fp16_t, __clc_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_fract(__clc_fp16_t, __clc_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_fract(__clc_fp16_t, __clc_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t *); +__spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t *); +__spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t *); +__spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t *); +__spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t *); +__spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t __global *); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_frexp(__clc_fp32_t, - __clc_int32_t *); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_fract(__clc_fp32_t, __clc_fp32_t __generic *); + +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_fract(__clc_vec2_fp32_t, __clc_vec2_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_fract(__clc_vec3_fp32_t, __clc_vec3_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_fract(__clc_vec4_fp32_t, __clc_vec4_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_fract(__clc_vec8_fp32_t, __clc_vec8_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_fract(__clc_vec16_fp32_t, __clc_vec16_fp32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_fract(__clc_fp64_t, __clc_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_fract(__clc_vec2_fp64_t, __clc_vec2_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_fract(__clc_vec3_fp64_t, __clc_vec3_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_fract(__clc_vec4_fp64_t, __clc_vec4_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_fract(__clc_vec8_fp64_t, __clc_vec8_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_fract(__clc_vec16_fp64_t, __clc_vec16_fp64_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_fract(__clc_fp16_t, __clc_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_fract(__clc_vec2_fp16_t, __clc_vec2_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_fract(__clc_vec3_fp16_t, __clc_vec3_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_fract(__clc_vec4_fp16_t, __clc_vec4_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_fract(__clc_vec8_fp16_t, __clc_vec8_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_fract(__clc_vec16_fp16_t, __clc_vec16_fp16_t __generic *); +#endif +#endif + +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_frexp(__clc_fp32_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_frexp(__clc_fp32_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_frexp(__clc_fp32_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t *); +__spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t *); +__spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t *); +__spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t *); +__spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t *); +__spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_frexp(__clc_fp64_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_frexp(__clc_fp64_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_frexp(__clc_fp64_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_frexp(__clc_fp64_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t *); +__spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t *); +__spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t *); +__spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t *); +__spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t *); +__spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -14553,44 +14601,91 @@ __spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t __global *); #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_frexp(__clc_fp16_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_frexp(__clc_fp16_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_frexp(__clc_fp16_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_frexp(__clc_fp16_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t *); +__spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t *); +__spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t *); +__spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t *); +__spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t *); +__spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_frexp(__clc_fp32_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_frexp(__clc_vec2_fp32_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_frexp(__clc_vec3_fp32_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_frexp(__clc_vec4_fp32_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_frexp(__clc_vec8_fp32_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_frexp(__clc_vec16_fp32_t, __clc_vec16_int32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_frexp(__clc_fp64_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_frexp(__clc_vec2_fp64_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_frexp(__clc_vec3_fp64_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_frexp(__clc_vec4_fp64_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_frexp(__clc_vec8_fp64_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_frexp(__clc_vec16_fp64_t, __clc_vec16_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +#endif + +#ifdef cl_khr_fp16 +__spirv_ocl_frexp(__clc_fp16_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_frexp(__clc_vec2_fp16_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_frexp(__clc_vec3_fp16_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_frexp(__clc_vec4_fp16_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_frexp(__clc_vec8_fp16_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_frexp(__clc_vec16_fp16_t, __clc_vec16_int32_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_half_cos(__clc_fp32_t); _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t @@ -15012,76 +15107,76 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t __spirv_ocl_lgamma(__clc_vec16_fp16_t); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_lgamma_r(__clc_fp32_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_lgamma_r(__clc_fp32_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_lgamma_r(__clc_fp32_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_lgamma_r(__clc_fp32_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_lgamma_r(__clc_fp64_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_lgamma_r(__clc_fp64_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_lgamma_r(__clc_fp64_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_lgamma_r(__clc_fp64_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -15089,44 +15184,90 @@ __spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t __global *); #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_lgamma_r(__clc_fp16_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_lgamma_r(__clc_fp16_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_lgamma_r(__clc_fp16_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_lgamma_r(__clc_fp16_t, __clc_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t *); +__spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_lgamma_r(__clc_fp32_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_lgamma_r(__clc_vec2_fp32_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_lgamma_r(__clc_vec3_fp32_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_lgamma_r(__clc_vec4_fp32_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_lgamma_r(__clc_vec8_fp32_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_lgamma_r(__clc_vec16_fp32_t, __clc_vec16_int32_t __generic *); +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_lgamma_r(__clc_fp64_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_lgamma_r(__clc_vec2_fp64_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_lgamma_r(__clc_vec3_fp64_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_lgamma_r(__clc_vec4_fp64_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_lgamma_r(__clc_vec8_fp64_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_lgamma_r(__clc_vec16_fp64_t, __clc_vec16_int32_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_lgamma_r(__clc_fp16_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_lgamma_r(__clc_vec2_fp16_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_lgamma_r(__clc_vec3_fp16_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_lgamma_r(__clc_vec4_fp16_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_lgamma_r(__clc_vec8_fp16_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_lgamma_r(__clc_vec16_fp16_t, __clc_vec16_int32_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_log(__clc_fp32_t); _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t __spirv_ocl_log(__clc_vec2_fp32_t); @@ -15518,37 +15659,37 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t #endif _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_modf(__clc_fp32_t, - __clc_fp32_t *); + __clc_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_modf(__clc_fp32_t, __clc_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_modf(__clc_fp32_t, __clc_fp32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_modf(__clc_vec2_fp32_t, - __clc_vec2_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_modf(__clc_vec2_fp32_t, __clc_vec2_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_modf(__clc_vec2_fp32_t, __clc_vec2_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_modf(__clc_vec2_fp32_t, __clc_vec2_fp32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_modf(__clc_vec3_fp32_t, - __clc_vec3_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_modf(__clc_vec3_fp32_t, __clc_vec3_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_modf(__clc_vec3_fp32_t, __clc_vec3_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_modf(__clc_vec3_fp32_t, __clc_vec3_fp32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_modf(__clc_vec4_fp32_t, - __clc_vec4_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_modf(__clc_vec4_fp32_t, __clc_vec4_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_modf(__clc_vec4_fp32_t, __clc_vec4_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_modf(__clc_vec4_fp32_t, __clc_vec4_fp32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_modf(__clc_vec8_fp32_t, - __clc_vec8_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_modf(__clc_vec8_fp32_t, __clc_vec8_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_modf(__clc_vec8_fp32_t, __clc_vec8_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_modf(__clc_vec8_fp32_t, __clc_vec8_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t *); +__spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t @@ -15556,37 +15697,37 @@ __spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t __global *); #ifdef cl_khr_fp64 _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_modf(__clc_fp64_t, - __clc_fp64_t *); + __clc_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_modf(__clc_fp64_t, __clc_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_modf(__clc_fp64_t, __clc_fp64_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_modf(__clc_vec2_fp64_t, - __clc_vec2_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_modf(__clc_vec2_fp64_t, __clc_vec2_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_modf(__clc_vec2_fp64_t, __clc_vec2_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_modf(__clc_vec2_fp64_t, __clc_vec2_fp64_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_modf(__clc_vec3_fp64_t, - __clc_vec3_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_modf(__clc_vec3_fp64_t, __clc_vec3_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_modf(__clc_vec3_fp64_t, __clc_vec3_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_modf(__clc_vec3_fp64_t, __clc_vec3_fp64_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_modf(__clc_vec4_fp64_t, - __clc_vec4_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_modf(__clc_vec4_fp64_t, __clc_vec4_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_modf(__clc_vec4_fp64_t, __clc_vec4_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_modf(__clc_vec4_fp64_t, __clc_vec4_fp64_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_modf(__clc_vec8_fp64_t, - __clc_vec8_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_modf(__clc_vec8_fp64_t, __clc_vec8_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_modf(__clc_vec8_fp64_t, __clc_vec8_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_modf(__clc_vec8_fp64_t, __clc_vec8_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t *); +__spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -15595,43 +15736,90 @@ __spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t __global *); #ifdef cl_khr_fp16 _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_modf(__clc_fp16_t, - __clc_fp16_t *); + __clc_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_modf(__clc_fp16_t, __clc_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_modf(__clc_fp16_t, __clc_fp16_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_modf(__clc_vec2_fp16_t, - __clc_vec2_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_modf(__clc_vec2_fp16_t, __clc_vec2_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_modf(__clc_vec2_fp16_t, __clc_vec2_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_modf(__clc_vec2_fp16_t, __clc_vec2_fp16_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_modf(__clc_vec3_fp16_t, - __clc_vec3_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_modf(__clc_vec3_fp16_t, __clc_vec3_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_modf(__clc_vec3_fp16_t, __clc_vec3_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_modf(__clc_vec3_fp16_t, __clc_vec3_fp16_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_modf(__clc_vec4_fp16_t, - __clc_vec4_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_modf(__clc_vec4_fp16_t, __clc_vec4_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_modf(__clc_vec4_fp16_t, __clc_vec4_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_modf(__clc_vec4_fp16_t, __clc_vec4_fp16_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_modf(__clc_vec8_fp16_t, - __clc_vec8_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_modf(__clc_vec8_fp16_t, __clc_vec8_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_modf(__clc_vec8_fp16_t, __clc_vec8_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_modf(__clc_vec8_fp16_t, __clc_vec8_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t *); +__spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_modf(__clc_fp32_t, + __clc_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_modf(__clc_vec2_fp32_t, __clc_vec2_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_modf(__clc_vec3_fp32_t, __clc_vec3_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_modf(__clc_vec4_fp32_t, __clc_vec4_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_modf(__clc_vec8_fp32_t, __clc_vec8_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_modf(__clc_vec16_fp32_t, __clc_vec16_fp32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_modf(__clc_fp64_t, + __clc_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_modf(__clc_vec2_fp64_t, __clc_vec2_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_modf(__clc_vec3_fp64_t, __clc_vec3_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_modf(__clc_vec4_fp64_t, __clc_vec4_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_modf(__clc_vec8_fp64_t, __clc_vec8_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_modf(__clc_vec16_fp64_t, __clc_vec16_fp64_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_modf(__clc_fp16_t, + __clc_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_modf(__clc_vec2_fp16_t, __clc_vec2_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_modf(__clc_vec3_fp16_t, __clc_vec3_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_modf(__clc_vec4_fp16_t, __clc_vec4_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_modf(__clc_vec8_fp16_t, __clc_vec8_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_modf(__clc_vec16_fp16_t, __clc_vec16_fp16_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_nan(__clc_int32_t); _CLC_OVERLOAD @@ -16457,78 +16645,76 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t __spirv_ocl_remainder(__clc_vec16_fp16_t, __clc_vec16_fp16_t); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_remquo(__clc_fp32_t, - __clc_fp32_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_remquo(__clc_fp32_t, __clc_fp32_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_remquo(__clc_fp32_t, __clc_fp32_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_remquo(__clc_fp32_t, __clc_fp32_t, __clc_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_remquo(__clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_remquo( + __clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_remquo( __clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_remquo( __clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_remquo(__clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_remquo( + __clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_remquo( __clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_remquo( __clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_remquo(__clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_remquo( + __clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_remquo( __clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_remquo( __clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_remquo(__clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_remquo( + __clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_remquo( __clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_remquo( __clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_remquo( - __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t *); + __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_remquo( __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_remquo( __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_remquo(__clc_fp64_t, - __clc_fp64_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_remquo(__clc_fp64_t, __clc_fp64_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_remquo(__clc_fp64_t, __clc_fp64_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_remquo(__clc_fp64_t, __clc_fp64_t, __clc_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_remquo(__clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_remquo( + __clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_remquo( __clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_remquo( __clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_remquo(__clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_remquo( + __clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_remquo( __clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_remquo( __clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_remquo(__clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_remquo( + __clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_remquo( __clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_remquo( __clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_remquo(__clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_remquo( + __clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_remquo( __clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_remquo( __clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( - __clc_vec16_fp64_t, __clc_vec16_fp64_t, __clc_vec16_int32_t *); + __clc_vec16_fp64_t, __clc_vec16_fp64_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( __clc_vec16_fp64_t, __clc_vec16_fp64_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( @@ -16536,45 +16722,91 @@ _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_remquo(__clc_fp16_t, - __clc_fp16_t, - __clc_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_remquo(__clc_fp16_t, __clc_fp16_t, __clc_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_remquo(__clc_fp16_t, __clc_fp16_t, __clc_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_remquo(__clc_fp16_t, __clc_fp16_t, __clc_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_remquo(__clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_remquo( + __clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_remquo( __clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_remquo( __clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_remquo(__clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_remquo( + __clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_remquo( __clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_remquo( __clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_remquo(__clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_remquo( + __clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_remquo( __clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_remquo( __clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t __global *); -_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_remquo(__clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_remquo( + __clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_remquo( __clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_remquo( __clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_remquo( - __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t *); + __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_remquo( __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_remquo( __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_remquo(__clc_fp32_t, __clc_fp32_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_remquo( + __clc_vec2_fp32_t, __clc_vec2_fp32_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_remquo( + __clc_vec3_fp32_t, __clc_vec3_fp32_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_remquo( + __clc_vec4_fp32_t, __clc_vec4_fp32_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_remquo( + __clc_vec8_fp32_t, __clc_vec8_fp32_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_remquo( + __clc_vec16_fp32_t, __clc_vec16_fp32_t, __clc_vec16_int32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_remquo(__clc_fp64_t, __clc_fp64_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_remquo( + __clc_vec2_fp64_t, __clc_vec2_fp64_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_remquo( + __clc_vec3_fp64_t, __clc_vec3_fp64_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_remquo( + __clc_vec4_fp64_t, __clc_vec4_fp64_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_remquo( + __clc_vec8_fp64_t, __clc_vec8_fp64_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_remquo( + __clc_vec16_fp64_t, __clc_vec16_fp64_t, __clc_vec16_int32_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_remquo(__clc_fp16_t, __clc_fp16_t, __clc_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_remquo( + __clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_remquo( + __clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_remquo( + __clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_remquo( + __clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_int32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_remquo( + __clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_int32_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_rint(__clc_fp32_t); _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t @@ -18679,76 +18911,76 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t __spirv_ocl_sin(__clc_vec16_fp16_t); #endif -_CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_sincos(__clc_fp32_t, - __clc_fp32_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_sincos(__clc_fp32_t, __clc_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_sincos(__clc_fp32_t, __clc_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp32_t __spirv_ocl_sincos(__clc_fp32_t, __clc_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t -__spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t *); +__spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t __spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t -__spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t *); +__spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t __spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t -__spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t *); +__spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t __spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t -__spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t *); +__spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t __spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t -__spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t *); +__spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t __spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t __global *); #ifdef cl_khr_fp64 -_CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_sincos(__clc_fp64_t, - __clc_fp64_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_sincos(__clc_fp64_t, __clc_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_sincos(__clc_fp64_t, __clc_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp64_t __spirv_ocl_sincos(__clc_fp64_t, __clc_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t -__spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t *); +__spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t __spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t -__spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t *); +__spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t __spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t -__spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t *); +__spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t __spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t -__spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t *); +__spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t __spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t -__spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t *); +__spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t __spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t @@ -18756,44 +18988,91 @@ __spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t __global *); #endif #ifdef cl_khr_fp16 -_CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_sincos(__clc_fp16_t, - __clc_fp16_t *); +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_sincos(__clc_fp16_t, __clc_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_sincos(__clc_fp16_t, __clc_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_fp16_t __spirv_ocl_sincos(__clc_fp16_t, __clc_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t -__spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t *); +__spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t __spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t -__spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t *); +__spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t __spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t -__spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t *); +__spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t __spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t -__spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t *); +__spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t __spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t __global *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t -__spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t *); +__spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t __private *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t __local *); _CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t __spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t __global *); #endif +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +_CLC_OVERLOAD _CLC_DECL __clc_fp32_t +__spirv_ocl_sincos(__clc_fp32_t, __clc_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp32_t +__spirv_ocl_sincos(__clc_vec2_fp32_t, __clc_vec2_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp32_t +__spirv_ocl_sincos(__clc_vec3_fp32_t, __clc_vec3_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp32_t +__spirv_ocl_sincos(__clc_vec4_fp32_t, __clc_vec4_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp32_t +__spirv_ocl_sincos(__clc_vec8_fp32_t, __clc_vec8_fp32_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp32_t +__spirv_ocl_sincos(__clc_vec16_fp32_t, __clc_vec16_fp32_t __generic *); + +#ifdef cl_khr_fp64 +_CLC_OVERLOAD _CLC_DECL __clc_fp64_t +__spirv_ocl_sincos(__clc_fp64_t, __clc_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp64_t +__spirv_ocl_sincos(__clc_vec2_fp64_t, __clc_vec2_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp64_t +__spirv_ocl_sincos(__clc_vec3_fp64_t, __clc_vec3_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp64_t +__spirv_ocl_sincos(__clc_vec4_fp64_t, __clc_vec4_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp64_t +__spirv_ocl_sincos(__clc_vec8_fp64_t, __clc_vec8_fp64_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp64_t +__spirv_ocl_sincos(__clc_vec16_fp64_t, __clc_vec16_fp64_t __generic *); +#endif + +#ifdef cl_khr_fp16 +_CLC_OVERLOAD _CLC_DECL __clc_fp16_t +__spirv_ocl_sincos(__clc_fp16_t, __clc_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec2_fp16_t +__spirv_ocl_sincos(__clc_vec2_fp16_t, __clc_vec2_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec3_fp16_t +__spirv_ocl_sincos(__clc_vec3_fp16_t, __clc_vec3_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec4_fp16_t +__spirv_ocl_sincos(__clc_vec4_fp16_t, __clc_vec4_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec8_fp16_t +__spirv_ocl_sincos(__clc_vec8_fp16_t, __clc_vec8_fp16_t __generic *); +_CLC_OVERLOAD _CLC_DECL __clc_vec16_fp16_t +__spirv_ocl_sincos(__clc_vec16_fp16_t, __clc_vec16_fp16_t __generic *); +#endif +#endif + _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_sinh(__clc_fp32_t); _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t diff --git a/libclc/generic/lib/math/fract.inc b/libclc/generic/lib/math/fract.inc index 9db5657bb45c5..84db0971b452c 100644 --- a/libclc/generic/lib/math/fract.inc +++ b/libclc/generic/lib/math/fract.inc @@ -31,18 +31,19 @@ #define ZERO 0.0h #endif -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) { - return __spirv_ocl_fract(x, iptr); -} - - #define FRACT_DEF(addrspace) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ return __spirv_ocl_fract(x, iptr); \ } +FRACT_DEF(private); FRACT_DEF(local); FRACT_DEF(global); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +FRACT_DEF(generic); +#endif #undef MIN_CONSTANT #undef ZERO diff --git a/libclc/generic/lib/math/frexp.cl b/libclc/generic/lib/math/frexp.cl index cd2c717815112..7a1651d270ec4 100644 --- a/libclc/generic/lib/math/frexp.cl +++ b/libclc/generic/lib/math/frexp.cl @@ -15,3 +15,12 @@ #define __CLC_ADDRESS_SPACE local #include #undef __CLC_ADDRESS_SPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif diff --git a/libclc/generic/lib/math/modf.inc b/libclc/generic/lib/math/modf.inc index 1ffc6d9e851bd..4a055a2998aca 100644 --- a/libclc/generic/lib/math/modf.inc +++ b/libclc/generic/lib/math/modf.inc @@ -28,14 +28,14 @@ #define ZERO 0.0h #endif -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) { +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) { *iptr = trunc(x); return copysign(isinf(x) ? ZERO : x - *iptr, x); } #define MODF_DEF(addrspace) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ - __CLC_GENTYPE private_iptr; \ + private __CLC_GENTYPE private_iptr; \ __CLC_GENTYPE ret = modf(x, &private_iptr); \ *iptr = private_iptr; \ return ret; \ @@ -43,5 +43,10 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) MODF_DEF(local); MODF_DEF(global); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +MODF_DEF(generic); +#endif #undef ZERO diff --git a/libclc/generic/lib/math/remquo.cl b/libclc/generic/lib/math/remquo.cl index fc29b366b36ed..e022b737630d5 100644 --- a/libclc/generic/lib/math/remquo.cl +++ b/libclc/generic/lib/math/remquo.cl @@ -15,3 +15,12 @@ #define __CLC_ADDRESS_SPACE private #include #undef __CLC_ADDRESS_SPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif diff --git a/libclc/generic/lib/math/remquo.inc b/libclc/generic/lib/math/remquo.inc index c33b5ddab3112..32bd41da37ddc 100644 --- a/libclc/generic/lib/math/remquo.inc +++ b/libclc/generic/lib/math/remquo.inc @@ -1,9 +1,9 @@ // TODO: Enable half precision when the sw routine is implemented #if __CLC_FPSIZE > 16 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) { - __CLC_INTN local_q; - __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q); - *q = local_q; + private __CLC_INTN private_q; + __CLC_GENTYPE ret = __clc_remquo(x, y, &private_q); + *q = private_q; return ret; } #endif diff --git a/libclc/generic/lib/math/sincos.inc b/libclc/generic/lib/math/sincos.inc index 05135d1b3290b..e56378ba0dff7 100644 --- a/libclc/generic/lib/math/sincos.inc +++ b/libclc/generic/lib/math/sincos.inc @@ -8,6 +8,11 @@ __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +__CLC_DECLARE_SINCOS(generic, __CLC_GENTYPE) +#endif #undef __CLC_DECLARE_SINCOS #endif diff --git a/libclc/generic/libspirv/math/fract.inc b/libclc/generic/libspirv/math/fract.inc index 55fbe4edaad39..8da1c2fe39627 100644 --- a/libclc/generic/libspirv/math/fract.inc +++ b/libclc/generic/libspirv/math/fract.inc @@ -38,7 +38,7 @@ __spirv_ocl_fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) { #define FRACT_DEF(addrspace) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_fract( \ __CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ - __CLC_GENTYPE private_iptr; \ + private __CLC_GENTYPE private_iptr; \ __CLC_GENTYPE ret = __spirv_ocl_fract(x, &private_iptr); \ *iptr = private_iptr; \ return ret; \ @@ -46,6 +46,11 @@ __spirv_ocl_fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) { FRACT_DEF(local); FRACT_DEF(global); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +FRACT_DEF(generic); +#endif #undef MIN_CONSTANT #undef ZERO diff --git a/libclc/generic/libspirv/math/frexp.cl b/libclc/generic/libspirv/math/frexp.cl index 314befe6d4f76..51bee881af259 100644 --- a/libclc/generic/libspirv/math/frexp.cl +++ b/libclc/generic/libspirv/math/frexp.cl @@ -23,3 +23,12 @@ #define __CLC_ADDRESS_SPACE local #include #undef __CLC_ADDRESS_SPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif diff --git a/libclc/generic/libspirv/math/lgamma_r.cl b/libclc/generic/libspirv/math/lgamma_r.cl index 6f23c50c0607a..fbff9ddf3191e 100644 --- a/libclc/generic/libspirv/math/lgamma_r.cl +++ b/libclc/generic/libspirv/math/lgamma_r.cl @@ -658,3 +658,12 @@ _CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __spirv_ocl_lgamma_r, half, #define __CLC_BODY #include #undef __CLC_ADDRSPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_ADDRSPACE generic +#define __CLC_BODY +#include +#undef __CLC_ADDRSPACE +#endif diff --git a/libclc/generic/libspirv/math/lgamma_r.inc b/libclc/generic/libspirv/math/lgamma_r.inc index 3d697814f221f..4a90b8812b3c5 100644 --- a/libclc/generic/libspirv/math/lgamma_r.inc +++ b/libclc/generic/libspirv/math/lgamma_r.inc @@ -8,7 +8,7 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) { - __CLC_INTN private_iptr; + private __CLC_INTN private_iptr; __CLC_GENTYPE ret = __spirv_ocl_lgamma_r(x, &private_iptr); *iptr = private_iptr; return ret; diff --git a/libclc/generic/libspirv/math/modf.inc b/libclc/generic/libspirv/math/modf.inc index edd4171ac859c..8b276da7cb1b9 100644 --- a/libclc/generic/libspirv/math/modf.inc +++ b/libclc/generic/libspirv/math/modf.inc @@ -23,7 +23,7 @@ #endif _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf(__CLC_GENTYPE x, - __CLC_GENTYPE *iptr) { + private __CLC_GENTYPE *iptr) { *iptr = __spirv_ocl_trunc(x); return __spirv_ocl_copysign( __CLC_CONVERT_NATN(__spirv_IsInf(x)) ? ZERO : x - *iptr, x); @@ -32,7 +32,7 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf(__CLC_GENTYPE x, #define MODF_DEF(addrspace) \ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf( \ __CLC_GENTYPE x, addrspace __CLC_GENTYPE *iptr) { \ - __CLC_GENTYPE private_iptr; \ + private __CLC_GENTYPE private_iptr; \ __CLC_GENTYPE ret = __spirv_ocl_modf(x, &private_iptr); \ *iptr = private_iptr; \ return ret; \ @@ -41,5 +41,11 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_modf(__CLC_GENTYPE x, MODF_DEF(local); MODF_DEF(global); +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +MODF_DEF(generic); +#endif + #undef __CLC_CONVERT_NATN #undef ZERO diff --git a/libclc/generic/libspirv/math/remquo.cl b/libclc/generic/libspirv/math/remquo.cl index 3c12d082b4614..5475604ac4ab2 100644 --- a/libclc/generic/libspirv/math/remquo.cl +++ b/libclc/generic/libspirv/math/remquo.cl @@ -24,3 +24,12 @@ #define __CLC_ADDRESS_SPACE private #include #undef __CLC_ADDRESS_SPACE + +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +#define __CLC_BODY +#define __CLC_ADDRESS_SPACE generic +#include +#undef __CLC_ADDRESS_SPACE +#endif diff --git a/libclc/generic/libspirv/math/remquo.inc b/libclc/generic/libspirv/math/remquo.inc index f94fb6586b621..074a4d8c6faeb 100644 --- a/libclc/generic/libspirv/math/remquo.inc +++ b/libclc/generic/libspirv/math/remquo.inc @@ -8,8 +8,8 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __spirv_ocl_remquo( __CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) { - __CLC_INTN local_q; - __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q); - *q = local_q; + private __CLC_INTN private_q; + __CLC_GENTYPE ret = __clc_remquo(x, y, &private_q); + *q = private_q; return ret; } diff --git a/libclc/generic/libspirv/math/sincos.inc b/libclc/generic/libspirv/math/sincos.inc index 33f964be33768..f92f32e42c3f6 100644 --- a/libclc/generic/libspirv/math/sincos.inc +++ b/libclc/generic/libspirv/math/sincos.inc @@ -16,4 +16,10 @@ __CLC_DECLARE_SINCOS(global, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE) __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE) +#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \ + defined(__opencl_c_generic_address_space)) +__CLC_DECLARE_SINCOS(generic, __CLC_GENTYPE) +#endif + #undef __CLC_DECLARE_SINCOS diff --git a/sycl/test-e2e/USM/math.cpp b/sycl/test-e2e/USM/math.cpp index 4baa2560b4b46..c9e063cba7248 100644 --- a/sycl/test-e2e/USM/math.cpp +++ b/sycl/test-e2e/USM/math.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: hip // RUN: %{build} -o %t.out // RUN: %{run} %t.out From 2d0ee9915c637d74437391aee96778413163d538 Mon Sep 17 00:00:00 2001 From: Marcos Maronas Date: Thu, 21 Mar 2024 07:37:46 -0700 Subject: [PATCH 13/22] [SYCL][E2E] Stop skipping image_selection.cpp test. (#13086) Using `,` in requires is equivalent to `&&`, and a device can't support simultaneously `opencl` and `level-zero`, so this test was always skipped. This patch corrects this by requiring either `opencl` or `level-zero`, but not both simultaneously. Signed-off-by: Marcos Maronas --- sycl/test-e2e/SpecConstants/2020/image_selection.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/SpecConstants/2020/image_selection.cpp b/sycl/test-e2e/SpecConstants/2020/image_selection.cpp index 8d0d424d2fa8a..feb4579ea5526 100644 --- a/sycl/test-e2e/SpecConstants/2020/image_selection.cpp +++ b/sycl/test-e2e/SpecConstants/2020/image_selection.cpp @@ -1,4 +1,4 @@ -// REQUIRES: opencl, level-zero, gpu, ocloc +// REQUIRES: (opencl || level-zero) && gpu && ocloc // Check the case when -fsycl-add-default-spec-consts-image option is used which // results in generation of two types of images: where specialization constants From 445893a7e83fa1f65e47e805383b354a8d8a34e6 Mon Sep 17 00:00:00 2001 From: Udit Agarwal <16324601+uditagarwal97@users.noreply.github.com> Date: Thu, 21 Mar 2024 07:49:14 -0700 Subject: [PATCH 14/22] [CI] Enable check-sycl-unittests in CI (#12858) This PR enables testing unittests in CI. --- .github/workflows/sycl-linux-build.yml | 6 ++++ .github/workflows/sycl-windows-build.yml | 4 +++ sycl/cmake/modules/AddSYCLUnitTest.cmake | 40 ++++++++++++++++------ sycl/unittests/xpti_trace/QueueIDCheck.cpp | 4 ++- 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/.github/workflows/sycl-linux-build.yml b/.github/workflows/sycl-linux-build.yml index 6c64e0c56c409..b9718b390bd99 100644 --- a/.github/workflows/sycl-linux-build.yml +++ b/.github/workflows/sycl-linux-build.yml @@ -186,6 +186,12 @@ jobs: # TODO consider moving this to Dockerfile. export LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH cmake --build $GITHUB_WORKSPACE/build --target check-sycl + - name: check-sycl-unittests + if: always() && !cancelled() && contains(inputs.changes, 'sycl') + run: | + # TODO consider moving this to Dockerfile. + export LD_LIBRARY_PATH=/usr/local/cuda/compat/:/usr/local/cuda/lib64:$LD_LIBRARY_PATH + cmake --build $GITHUB_WORKSPACE/build --target check-sycl-unittests - name: check-llvm-spirv if: always() && !cancelled() && contains(inputs.changes, 'llvm_spirv') run: | diff --git a/.github/workflows/sycl-windows-build.yml b/.github/workflows/sycl-windows-build.yml index 64e62b7ef58fd..47014318c16de 100644 --- a/.github/workflows/sycl-windows-build.yml +++ b/.github/workflows/sycl-windows-build.yml @@ -109,6 +109,10 @@ jobs: if: always() && !cancelled() && contains(inputs.changes, 'sycl') run: | cmake --build build --target check-sycl + - name: check-sycl-unittests + if: always() && !cancelled() && contains(inputs.changes, 'sycl') + run: | + cmake --build build --target check-sycl-unittests - name: check-llvm-spirv if: always() && !cancelled() && contains(inputs.changes, 'llvm_spirv') run: | diff --git a/sycl/cmake/modules/AddSYCLUnitTest.cmake b/sycl/cmake/modules/AddSYCLUnitTest.cmake index ea8135be0f09e..9571d43cc07c9 100644 --- a/sycl/cmake/modules/AddSYCLUnitTest.cmake +++ b/sycl/cmake/modules/AddSYCLUnitTest.cmake @@ -37,16 +37,34 @@ macro(add_sycl_unittest test_dirname link_variant) ) endif() - add_custom_target(check-sycl-${test_dirname} - ${CMAKE_COMMAND} -E env - LLVM_PROFILE_FILE="${SYCL_COVERAGE_PATH}/${test_dirname}.profraw" - env SYCL_CONFIG_FILE_NAME=null.cfg - env SYCL_DEVICELIB_NO_FALLBACK=1 - env SYCL_CACHE_DIR="${CMAKE_BINARY_DIR}/sycl_cache" - ${CMAKE_CURRENT_BINARY_DIR}/${test_dirname} - DEPENDS - ${test_dirname} - ) + # check-sycl-unittests was using an old sycl library. So, to get + # around this problem, we add the new sycl library to the PATH and + # LD_LIBRARY_PATH on Windows and Linux respectively. + if (WIN32) + add_custom_target(check-sycl-${test_dirname} + ${CMAKE_COMMAND} -E env + LLVM_PROFILE_FILE="${SYCL_COVERAGE_PATH}/${test_dirname}.profraw" + SYCL_CONFIG_FILE_NAME=null.cfg + SYCL_DEVICELIB_NO_FALLBACK=1 + SYCL_CACHE_DIR="${CMAKE_BINARY_DIR}/sycl_cache" + "PATH=${CMAKE_BINARY_DIR}/bin;$ENV{PATH}" + ${CMAKE_CURRENT_BINARY_DIR}/${test_dirname} + DEPENDS + ${test_dirname} + ) + else() + add_custom_target(check-sycl-${test_dirname} + ${CMAKE_COMMAND} -E env + LLVM_PROFILE_FILE="${SYCL_COVERAGE_PATH}/${test_dirname}.profraw" + SYCL_CONFIG_FILE_NAME=null.cfg + SYCL_DEVICELIB_NO_FALLBACK=1 + SYCL_CACHE_DIR="${CMAKE_BINARY_DIR}/sycl_cache" + "LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/lib:$ENV{LD_LIBRARY_PATH}" + ${CMAKE_CURRENT_BINARY_DIR}/${test_dirname} + DEPENDS + ${test_dirname} + ) + endif() add_dependencies(check-sycl-unittests check-sycl-${test_dirname}) @@ -60,7 +78,7 @@ macro(add_sycl_unittest test_dirname link_variant) if(SYCL_ENABLE_KERNEL_FUSION) target_link_libraries(${test_dirname} PRIVATE sycl-fusion) endif(SYCL_ENABLE_KERNEL_FUSION) - + target_include_directories(${test_dirname} PRIVATE SYSTEM ${sycl_inc_dir} diff --git a/sycl/unittests/xpti_trace/QueueIDCheck.cpp b/sycl/unittests/xpti_trace/QueueIDCheck.cpp index c57c7091c1fe9..1baf72b87a59a 100644 --- a/sycl/unittests/xpti_trace/QueueIDCheck.cpp +++ b/sycl/unittests/xpti_trace/QueueIDCheck.cpp @@ -136,7 +136,9 @@ TEST_F(QueueID, QueueCreationAndKernelWithDeps) { checkTaskBeginEnd(QueueIDSTr); } -TEST_F(QueueID, QueueCreationUSMOperations) { +// Re-enable this test after fixing +// https://github.com/intel/llvm/issues/12963 +TEST_F(QueueID, DISABLED_QueueCreationUSMOperations) { sycl::queue Q0; auto Queue0ImplPtr = sycl::detail::getSyclObjImpl(Q0); auto QueueIDSTr = std::to_string(Queue0ImplPtr->getQueueID()); From 710fd87d840cbfb669b871e1f3fd4f909907f4c6 Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Thu, 21 Mar 2024 16:15:02 +0100 Subject: [PATCH 15/22] [SYCL][NATIVECPU] Remove run lines for multi targets Native CPU tests (#13087) The tests assume that an OpenCL implementation is available, but it's not always the case. --- sycl/test/native_cpu/multi-devices-swap.cpp | 1 - sycl/test/native_cpu/multi-devices.cpp | 1 - sycl/test/native_cpu/vector-add-pointers.cpp | 0 3 files changed, 2 deletions(-) mode change 100755 => 100644 sycl/test/native_cpu/vector-add-pointers.cpp diff --git a/sycl/test/native_cpu/multi-devices-swap.cpp b/sycl/test/native_cpu/multi-devices-swap.cpp index 282b89ba23e41..9b2971cf94fe1 100644 --- a/sycl/test/native_cpu/multi-devices-swap.cpp +++ b/sycl/test/native_cpu/multi-devices-swap.cpp @@ -2,7 +2,6 @@ // REQUIRES: opencl_be // RUN: %clangxx -fsycl -fsycl-targets=native_cpu,spir64 %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t -// RUN: env ONEAPI_DEVICE_SELECTOR="opencl:cpu" %t #include diff --git a/sycl/test/native_cpu/multi-devices.cpp b/sycl/test/native_cpu/multi-devices.cpp index 6a3cba9ec5307..1e74020c63730 100644 --- a/sycl/test/native_cpu/multi-devices.cpp +++ b/sycl/test/native_cpu/multi-devices.cpp @@ -2,7 +2,6 @@ // REQUIRES: opencl_be // RUN: %clangxx -fsycl -fsycl-targets=spir64,native_cpu %s -o %t // RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t -// RUN: env ONEAPI_DEVICE_SELECTOR="opencl:cpu" %t #include diff --git a/sycl/test/native_cpu/vector-add-pointers.cpp b/sycl/test/native_cpu/vector-add-pointers.cpp old mode 100755 new mode 100644 From b7501aea0f0b7ab82160b262965e08dbdbec4ecf Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 21 Mar 2024 14:10:29 -0700 Subject: [PATCH 16/22] [NFCI] Introduce include (#12890) The idea is to have a thinner version of `` that allows to run basic tests and enables fine-grained includes in our e2e tests. Hopefully that can increase CI efficiency. --- sycl/include/sycl/detail/core.hpp | 23 +++++++++++++++++++ .../ext/oneapi/experimental/root_group.hpp | 1 - sycl/include/sycl/sycl.hpp | 5 ++-- .../assert_in_simultaneously_multiple_tus.cpp | 2 +- .../AsyncHandler/default_async_handler.cpp | 2 +- .../BFloat16/bfloat16_conversions.cpp | 2 +- sycl/test-e2e/BFloat16/bfloat_hw.cpp | 2 +- sycl/test-e2e/Basic/AMX_aspect.cpp | 2 +- sycl/test-e2e/Basic/access_to_subset.cpp | 2 +- .../Basic/accessor/empty_acc_host_task.cpp | 2 +- .../accessor/empty_zero_dim_accessor.cpp | 2 +- .../Basic/alloc_pinned_host_memory.cpp | 2 +- sycl/test-e2e/Basic/aspects.cpp | 2 +- sycl/test-e2e/Basic/bit_cast/bit_cast.cpp | 2 +- sycl/test-e2e/Basic/boolean.cpp | 5 +++- sycl/test-e2e/Basic/buffer/buffer.cpp | 2 +- .../Basic/buffer/buffer_allocator.cpp | 2 +- .../Basic/buffer/buffer_container.cpp | 2 +- sycl/test-e2e/Basic/buffer/buffer_create.cpp | 2 +- .../Basic/buffer/buffer_dev_to_dev.cpp | 2 +- .../Basic/buffer/buffer_full_copy.cpp | 2 +- sycl/test-e2e/Basic/buffer/buffer_migrate.cpp | 2 +- sycl/test-e2e/Basic/buffer/reinterpret.cpp | 2 +- sycl/test-e2e/Basic/buffer/subbuffer.cpp | 2 +- .../Basic/buffer/subbuffer_overlap.cpp | 2 +- sycl/test-e2e/Basic/built-ins/helpers.hpp | 4 +++- sycl/test-e2e/Basic/built-ins/marray_math.cpp | 4 +++- sycl/test-e2e/Basic/built-ins/vec_common.cpp | 4 +++- .../Basic/built-ins/vec_geometric.cpp | 4 +++- sycl/test-e2e/Basic/built-ins/vec_math.cpp | 4 +++- .../Basic/built-ins/vec_relational.cpp | 4 +++- .../Basic/compare_exchange_strong.cpp | 2 +- sycl/test-e2e/Basic/context_platforms.cpp | 2 +- sycl/test-e2e/Basic/default_device.cpp | 2 +- sycl/test-e2e/Basic/default_platform.cpp | 2 +- sycl/test-e2e/Basic/device-selectors.cpp | 2 +- sycl/test-e2e/Basic/device.cpp | 2 +- sycl/test-e2e/Basic/device_equality.cpp | 2 +- .../Basic/diagnostics/device-check.cpp | 2 +- sycl/test-e2e/Basic/diagnostics/handler.cpp | 2 +- sycl/test-e2e/Basic/enqueue_barrier.cpp | 2 +- sycl/test-e2e/Basic/event.cpp | 2 +- sycl/test-e2e/Basic/event_async_exception.cpp | 2 +- sycl/test-e2e/Basic/fill_accessor.cpp | 2 +- sycl/test-e2e/Basic/fill_accessor_pi.cpp | 2 +- sycl/test-e2e/Basic/fpga_tests/pipes_info.cpp | 2 +- sycl/test-e2e/Basic/get_backend.cpp | 2 +- sycl/test-e2e/Basic/get_info_aspect.cpp | 2 +- sycl/test-e2e/Basic/half_builtins.cpp | 4 +++- sycl/test-e2e/Basic/half_type.cpp | 2 +- .../handler/handler_copy_with_offset.cpp | 2 +- .../handler_generic_integral_lambda.cpp | 2 +- .../test-e2e/Basic/handler/handler_mem_op.cpp | 2 +- sycl/test-e2e/Basic/host-task-dependency.cpp | 2 +- sycl/test-e2e/Basic/host_defer_copy.cpp | 2 +- sycl/test-e2e/Basic/host_write_back.cpp | 2 +- .../Basic/image/image_constructors.cpp | 2 +- .../Basic/image/srgba-aspect-check.cpp | 2 +- sycl/test-e2e/Basic/info.cpp | 2 +- .../Basic/interop/construction_ze.cpp | 2 +- .../Basic/interop/ze_context_device.cpp | 2 +- sycl/test-e2e/Basic/library_loading.cpp | 2 +- sycl/test-e2e/Basic/linear-sub_group.cpp | 2 +- sycl/test-e2e/Basic/local_mem_type.cpp | 2 +- sycl/test-e2e/Basic/multisource.cpp | 2 +- sycl/test-e2e/Basic/multisource_spv_obj.cpp | 2 +- .../Basic/offset-accessor-get_pointer.cpp | 2 +- .../parallel_for_disable_range_roundup.cpp | 2 +- .../Basic/parallel_for_offset_integral_t.cpp | 2 +- sycl/test-e2e/Basic/partition_supported.cpp | 2 +- .../queue_select_device_not_in_context.cpp | 2 +- .../Basic/queue/queue_shortcut_functions.cpp | 2 +- sycl/test-e2e/Basic/queue/release.cpp | 2 +- .../reqd_work_group_size_check_exception.cpp | 2 +- .../test-e2e/Basic/spirv_device_obj_smoke.cpp | 2 +- sycl/test-e2e/Basic/submit_barrier.cpp | 2 +- sycl/test-e2e/Basic/swizzle_op.cpp | 2 +- sycl/test-e2e/Basic/sycl-namespace.cpp | 2 +- sycl/test-e2e/Basic/unused_pointer.cpp | 2 +- sycl/test-e2e/Basic/vector/bool.cpp | 2 +- sycl/test-e2e/Basic/vector/byte.cpp | 2 +- sycl/test-e2e/Basic/vector/int-convert.cpp | 2 +- sycl/test-e2e/Basic/vector/operators.cpp | 2 +- .../Complex/sycl_complex_include_order.cpp | 2 +- sycl/test-e2e/Config/config.cpp | 2 +- sycl/test-e2e/Config/default-context.cpp | 2 +- sycl/test-e2e/Config/device_selector.cpp | 2 +- sycl/test-e2e/Config/env_vars.cpp | 2 +- sycl/test-e2e/Config/kernel_from_file.cpp | 2 +- sycl/test-e2e/Config/select_device.cpp | 2 +- .../DeprecatedFeatures/deprecated.cpp | 2 +- .../deprecated_sycl_device_filter.cpp | 2 +- sycl/test-e2e/DeprecatedFeatures/platform.cpp | 2 +- .../DeprecatedFeatures/queue_old_interop.cpp | 2 +- .../DeprecatedFeatures/sycl_device_filter.cpp | 2 +- .../device_architecture_on_host.cpp | 2 +- .../DeviceCodeSplit/split-per-kernel.cpp | 2 +- .../DeviceLib/built-ins/fast-math-flag.cpp | 4 +++- .../test-e2e/DeviceLib/built-ins/ftz-flag.cpp | 5 +++- .../DeviceLib/built-ins/marray_integer.cpp | 4 +++- sycl/test-e2e/DeviceLib/built-ins/nan.cpp | 6 +++-- .../DeviceLib/built-ins/scalar_common.cpp | 4 +++- .../DeviceLib/built-ins/scalar_geometric.cpp | 4 +++- .../DeviceLib/built-ins/scalar_integer.cpp | 4 +++- .../DeviceLib/built-ins/scalar_math.cpp | 4 +++- .../DeviceLib/built-ins/scalar_math_2.cpp | 4 +++- .../DeviceLib/built-ins/scalar_relational.cpp | 4 +++- .../DeviceLib/built-ins/vector_common.cpp | 4 +++- .../DeviceLib/built-ins/vector_geometric.cpp | 4 +++- .../DeviceLib/built-ins/vector_integer.cpp | 4 +++- .../DeviceLib/built-ins/vector_math.cpp | 4 +++- .../DeviceLib/built-ins/vector_relational.cpp | 4 +++- sycl/test-e2e/DeviceLib/cmath_test.cpp | 2 +- .../imf_bfloat16_integeral_convesions.cpp | 2 +- .../DeviceLib/imf_simd_emulate_test.cpp | 2 +- sycl/test-e2e/DeviceLib/math_fp64_test.cpp | 2 +- .../test-e2e/DeviceLib/math_override_test.cpp | 2 +- sycl/test-e2e/DeviceLib/math_test.cpp | 2 +- .../DotProduct/dot_product_int_test.cpp | 2 +- .../DotProduct/dot_product_vec_test.cpp | 2 +- .../ESIMD/regression/double_conversion.cpp | 2 +- .../regression/fmod_compatibility_test.cpp | 2 +- .../ESIMD/regression/half_conversion_test.cpp | 2 +- .../regression/sycl_esimd_mixed_unnamed.cpp | 2 +- .../test-e2e/ESIMD/regression/unused_load.cpp | 2 +- .../select_device_level_zero.cpp | 2 +- sycl/test-e2e/Functor/functor_inheritance.cpp | 2 +- sycl/test-e2e/Functor/kernel_functor.cpp | 2 +- sycl/test-e2e/HierPar/hier_par_basic.cpp | 4 ++-- .../HostInteropTask/host-task-dependency2.cpp | 2 +- .../HostInteropTask/host-task-dependency3.cpp | 2 +- .../HostInteropTask/host-task-dependency4.cpp | 2 +- .../HostInteropTask/host-task-failure.cpp | 2 +- sycl/test-e2e/HostInteropTask/host-task.cpp | 2 +- .../InlineAsm/Negative/asm_bad_opcode.cpp | 2 +- .../Negative/asm_bad_operand_syntax.cpp | 2 +- .../Negative/asm_duplicate_label.cpp | 2 +- .../Negative/asm_illegal_exec_size.cpp | 2 +- .../InlineAsm/Negative/asm_missing_label.cpp | 2 +- .../InlineAsm/Negative/asm_missing_region.cpp | 2 +- .../InlineAsm/Negative/asm_simple.cpp | 2 +- .../InlineAsm/Negative/asm_undefined_decl.cpp | 2 +- .../InlineAsm/Negative/asm_undefined_pred.cpp | 2 +- .../InlineAsm/Negative/asm_wrong_declare.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_16_empty.cpp | 2 +- .../test-e2e/InlineAsm/asm_16_matrix_mult.cpp | 2 +- .../InlineAsm/asm_16_no_input_int.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_16_no_opts.cpp | 2 +- .../InlineAsm/asm_arbitrary_ops_order.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_decl_in_scope.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_float_add.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_float_imm_arg.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_float_neg.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_if.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_imm_arg.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_loop.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_mul.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_no_operands.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_no_output.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_plus_mod.cpp | 2 +- sycl/test-e2e/InlineAsm/asm_switch.cpp | 2 +- .../Spec/clang_run_error/ESIMD_to_SPMD.cpp | 2 +- .../Spec/clang_run_error/recurs.cpp | 2 +- .../clang_run_error/sycl_marray_argument.cpp | 2 +- .../clang_run_error/sycl_vec_argument.cpp | 2 +- sycl/test-e2e/KernelAndProgram/build-log.cpp | 2 +- .../KernelAndProgram/cache-build-result.cpp | 2 +- .../kernel-bundle-get-kernel.cpp | 2 +- .../level-zero-link-flags.cpp | 2 +- .../level-zero-static-link-flow.cpp | 2 +- .../multiple-kernel-linking.cpp | 2 +- .../KernelAndProgram/target_compile_fast.cpp | 2 +- .../KernelAndProgram/test_cache_jit_aot.cpp | 2 +- .../KernelAndProgram/undefined-symbol.cpp | 2 +- .../array-kernel-param-nested-run.cpp | 2 +- .../KernelParams/array-kernel-param-run.cpp | 2 +- .../KernelParams/non-standard-layout.cpp | 2 +- .../KernelParams/struct_kernel_param.cpp | 2 +- .../KernelParams/union_kernel_param.cpp | 2 +- .../NonUniformGroups/is_fixed_topology.cpp | 2 +- .../OneapiDeviceSelector/level_zero_top.cpp | 2 +- .../OneapiDeviceSelector/sub-devices.cpp | 2 +- .../OnlineCompiler/online_compiler_L0.cpp | 2 +- .../test-e2e/OptionalKernelFeatures/esimd.cpp | 2 +- .../OptionalKernelFeatures/fp64_relaxed.cpp | 2 +- .../is_compatible_with_aspects.cpp | 2 +- .../no-speculative-compilation.cpp | 2 +- .../sycl-external-with-optional-features.cpp | 4 ++-- .../Plugin/enqueue-arg-order-buffer.cpp | 4 ++-- .../Plugin/enqueue-arg-order-image.cpp | 2 +- .../interop-level-zero-buffer-multi-dim.cpp | 2 +- .../interop-level-zero-buffer-ownership.cpp | 2 +- .../Plugin/interop-level-zero-buffer.cpp | 2 +- .../interop-level-zero-get-native-mem.cpp | 2 +- .../interop-level-zero-keep-ownership.cpp | 2 +- sycl/test-e2e/Plugin/interop-level-zero.cpp | 2 +- .../Plugin/level-zero-usm-capabilities.cpp | 2 +- .../Plugin/level_zero_batch_event_status.cpp | 2 +- ...device_memory_clock_rate_and_bus_width.cpp | 2 +- .../Plugin/level_zero_device_scope_events.cpp | 2 +- .../level_zero_imm_cmdlist_per_thread.cpp | 2 +- .../Plugin/level_zero_queue_create.cpp | 2 +- ...evel_zero_track_indirect_access_memory.cpp | 2 +- sycl/test-e2e/Plugin/sycl-partition-info.cpp | 2 +- .../preview_lib_marker.cpp | 2 +- .../sycl-opt-level-level-zero.cpp | 2 +- .../sycl-opt-level-opencl.cpp | 2 +- sycl/test-e2e/README.md | 13 +++++++++++ .../Reduction/reduction_dynamic_span.cpp | 2 +- .../Regression/DAE-separate-compile.cpp | 2 +- sycl/test-e2e/Regression/atomic_load.cpp | 2 +- sycl/test-e2e/Regression/cache_test.cpp | 2 +- sycl/test-e2e/Regression/device_num.cpp | 2 +- .../device_pci_address_bdf_format.cpp | 2 +- .../Regression/empty_accessor_use.cpp | 2 +- .../Regression/fp16-with-unnamed-lambda.cpp | 2 +- .../Regression/fsycl-host-compiler.cpp | 2 +- .../Regression/get_subgroup_sizes.cpp | 2 +- sycl/test-e2e/Regression/global_queue.cpp | 2 +- .../Regression/host_unified_memory.cpp | 2 +- .../Regression/implicit_atomic_conversion.cpp | 2 +- sycl/test-e2e/Regression/isordered.cpp | 4 +++- .../kernel_bundle_ignore_sycl_external.cpp | 2 +- .../test-e2e/Regression/kernel_name_class.cpp | 2 +- sycl/test-e2e/Regression/kernel_unnamed.cpp | 2 +- sycl/test-e2e/Regression/mad_sat.cpp | 4 +++- .../nontrivial_device_copyable_value.cpp | 2 +- .../optimization_level_debug_info_intopt.cpp | 2 +- .../optimization_level_debug_info_specopt.cpp | 2 +- sycl/test-e2e/Regression/pi_release.cpp | 2 +- .../Regression/private_array_init_test.cpp | 2 +- .../Regression/range-rounding-this-id.cpp | 2 +- .../Regression/same_unnamed_kernels.cpp | 2 +- .../Regression/static-buffer-dtor.cpp | 2 +- .../test-e2e/Regression/subalign_no_alloc.cpp | 2 +- .../Regression/vec_rel_swizzle_ops.cpp | 2 +- .../Scheduler/BasicSchedulerTests.cpp | 2 +- .../Scheduler/CommandCleanupThreadSafety.cpp | 2 +- sycl/test-e2e/Scheduler/DataMovement.cpp | 2 +- .../test-e2e/Scheduler/HostAccDestruction.cpp | 2 +- sycl/test-e2e/Scheduler/MemObjRemapping.cpp | 2 +- sycl/test-e2e/Scheduler/MultipleDevices.cpp | 2 +- .../Scheduler/ReleaseResourcesTest.cpp | 2 +- .../test-e2e/Scheduler/SubBufferRemapping.cpp | 2 +- sycl/test-e2e/SeparateCompile/same-kernel.cpp | 2 +- .../SeparateCompile/sycl-external.cpp | 2 +- sycl/test-e2e/SeparateCompile/test.cpp | 4 ++-- sycl/test-e2e/SharedLib/use_when_link.cpp | 2 +- .../SharedLib/use_when_link_verify_cache.cpp | 2 +- sycl/test-e2e/SharedLib/use_with_dlopen.cpp | 2 +- .../use_with_dlopen_verify_cache.cpp | 2 +- sycl/test-e2e/Tracing/buffer_printers.cpp | 2 +- sycl/test-e2e/Tracing/pi_tracing_test.cpp | 2 +- sycl/test-e2e/XPTI/buffer/host_array.cpp | 2 +- sycl/test-e2e/XPTI/buffer/in_cycle.cpp | 2 +- .../test-e2e/XPTI/buffer/multiple_buffers.cpp | 2 +- sycl/test-e2e/XPTI/buffer/recursion.cpp | 2 +- sycl/test-e2e/XPTI/buffer/sub_buffer.cpp | 2 +- sycl/test-e2e/XPTI/buffer/use_host_ptr.cpp | 2 +- sycl/test-e2e/XPTI/image/accessors.cpp | 2 +- .../syclcompat/atomic/atomic_arith.cpp | 2 +- .../syclcompat/atomic/atomic_bitwise.cpp | 2 +- .../atomic/atomic_comp_exchange.cpp | 2 +- .../atomic/atomic_memory_acq_rel.cpp | 2 +- .../syclcompat/atomic/atomic_minmax.cpp | 2 +- sycl/test-e2e/syclcompat/dim.cpp | 2 +- .../syclcompat/math/math_length_test.cpp | 2 +- .../math/math_vectorized_isgreater_test.cpp | 2 +- .../math/math_vectorized_max_test.cpp | 2 +- .../math/math_vectorized_min_test.cpp | 2 +- sycl/test-e2e/syclcompat/memory/memcpy_3d.cpp | 2 +- .../test-e2e/syclcompat/memory/memcpy_3d2.cpp | 2 +- .../syclcompat/memory/memory_async.cpp | 2 +- .../memory/memory_management_diff_queues.cpp | 2 +- .../memory/memory_management_test3.cpp | 2 +- .../syclcompat/util/util_cast_value_test.cpp | 2 +- .../syclcompat/util/util_find_first_set.cpp | 2 +- .../syclcompat/util/util_logical_group.cpp | 2 +- .../util/util_matrix_mem_copy_test.cpp | 2 +- .../util/util_nd_range_barrier_test.cpp | 2 +- .../syclcompat/util/util_perm_byte_test.cpp | 2 +- .../util/util_permute_sub_group_by_xor.cpp | 2 +- .../util/util_reverse_bits_test.cpp | 2 +- .../util/util_select_from_sub_group.cpp | 2 +- .../util/util_shift_sub_group_left.cpp | 2 +- .../util/util_shift_sub_group_right.cpp | 2 +- 286 files changed, 377 insertions(+), 291 deletions(-) create mode 100644 sycl/include/sycl/detail/core.hpp diff --git a/sycl/include/sycl/detail/core.hpp b/sycl/include/sycl/detail/core.hpp new file mode 100644 index 0000000000000..c542ee7e8254b --- /dev/null +++ b/sycl/include/sycl/detail/core.hpp @@ -0,0 +1,23 @@ +//==------------ sycl.hpp - SYCL2020 standard header file ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +// This is an ongoing experimental activity in its early stage. No code outside +// this project must rely on the behavior of this header file - keep using +// . +// +// Short-term plan/action items (in no particular order): +// * Update more tests to use this instead of full . +// * Refactor includes so that transitive dependencies don't bring as much as +// they currently do. +// * Determine what else should be included here. + +#include +#include +#include diff --git a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp index 9b6fd04a7e5a9..c41ee9c486a0c 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/root_group.hpp @@ -8,7 +8,6 @@ #pragma once -#include #include #include #include diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 73a37673d70b8..0903bce7dd23e 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -8,7 +8,8 @@ #pragma once -#include +#include + #include #include #include @@ -17,7 +18,6 @@ #if SYCL_BACKEND_OPENCL #include #endif -#include #include #include #include @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include diff --git a/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus.cpp b/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus.cpp index 763c1b49386c9..1a69f7005090f 100644 --- a/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus.cpp +++ b/sycl/test-e2e/Assert/assert_in_simultaneously_multiple_tus.cpp @@ -31,7 +31,7 @@ #include "Inputs/kernels_in_file2.hpp" #include #include -#include +#include #include #ifdef DEFINE_NDEBUG_INFILE1 diff --git a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp index e90f72f6cc48b..7e6a333e305d7 100644 --- a/sycl/test-e2e/AsyncHandler/default_async_handler.cpp +++ b/sycl/test-e2e/AsyncHandler/default_async_handler.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out &> %t.txt ; FileCheck %s --input-file %t.txt -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp b/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp index 85abf3303ec7c..907faf0b5292a 100644 --- a/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp +++ b/sycl/test-e2e/BFloat16/bfloat16_conversions.cpp @@ -17,7 +17,7 @@ //===---------------------------------------------------------------------===// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/BFloat16/bfloat_hw.cpp b/sycl/test-e2e/BFloat16/bfloat_hw.cpp index 952768ee74c11..0154d21156fd1 100644 --- a/sycl/test-e2e/BFloat16/bfloat_hw.cpp +++ b/sycl/test-e2e/BFloat16/bfloat_hw.cpp @@ -3,7 +3,7 @@ // "Hello world" bfloat16 test which checks conversion algorithms on host. -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/AMX_aspect.cpp b/sycl/test-e2e/Basic/AMX_aspect.cpp index 09bcb89111061..ff3225c3670f5 100644 --- a/sycl/test-e2e/Basic/AMX_aspect.cpp +++ b/sycl/test-e2e/Basic/AMX_aspect.cpp @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; using arch = sycl::ext::oneapi::experimental::architecture; diff --git a/sycl/test-e2e/Basic/access_to_subset.cpp b/sycl/test-e2e/Basic/access_to_subset.cpp index f2002df8116fe..3fc81f477c0de 100644 --- a/sycl/test-e2e/Basic/access_to_subset.cpp +++ b/sycl/test-e2e/Basic/access_to_subset.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; using acc_w = accessor; diff --git a/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp b/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp index afee48c3cf081..b8e91547b83ca 100644 --- a/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp +++ b/sycl/test-e2e/Basic/accessor/empty_acc_host_task.cpp @@ -1,6 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/accessor/empty_zero_dim_accessor.cpp b/sycl/test-e2e/Basic/accessor/empty_zero_dim_accessor.cpp index 62f61136f024f..abd5a1fb643f9 100644 --- a/sycl/test-e2e/Basic/accessor/empty_zero_dim_accessor.cpp +++ b/sycl/test-e2e/Basic/accessor/empty_zero_dim_accessor.cpp @@ -3,7 +3,7 @@ // Tests the size and iterator members of an empty zero-dimensional accessor. -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp b/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp index 8b6e847c01597..943cced16aa3e 100644 --- a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp +++ b/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp @@ -4,7 +4,7 @@ // RUN: env SYCL_PI_TRACE=2 UR_L0_DEBUG=1 %{run} %t2.out %if level_zero %{ 2>&1 | FileCheck %s %} // RUN: %{run} %t2.out -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/aspects.cpp b/sycl/test-e2e/Basic/aspects.cpp index d14e4764f49e9..75daf263e638e 100644 --- a/sycl/test-e2e/Basic/aspects.cpp +++ b/sycl/test-e2e/Basic/aspects.cpp @@ -15,7 +15,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/bit_cast/bit_cast.cpp b/sycl/test-e2e/Basic/bit_cast/bit_cast.cpp index 250b8edb6b170..84e29a4f4d014 100644 --- a/sycl/test-e2e/Basic/bit_cast/bit_cast.cpp +++ b/sycl/test-e2e/Basic/bit_cast/bit_cast.cpp @@ -3,7 +3,7 @@ // RUN: %if windows %{ %clangxx -fsycl -fsycl-host-compiler=cl -fsycl-host-compiler-options='/std:c++17 /Zc:__cplusplus' -o %t2.out %s %} // RUN: %if windows %{ %{run} %t2.out %} -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/boolean.cpp b/sycl/test-e2e/Basic/boolean.cpp index 6c376c6dcf8b9..c19d614268a83 100644 --- a/sycl/test-e2e/Basic/boolean.cpp +++ b/sycl/test-e2e/Basic/boolean.cpp @@ -2,7 +2,10 @@ // RUN: %{run} %t.out #include #include -#include + +#include + +#include using namespace sycl; namespace s = sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer.cpp b/sycl/test-e2e/Basic/buffer/buffer.cpp index 8f3e243e86436..92c4f12cdfd6c 100644 --- a/sycl/test-e2e/Basic/buffer/buffer.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer_allocator.cpp b/sycl/test-e2e/Basic/buffer/buffer_allocator.cpp index 90784e72249bb..f0a0ba3105ace 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_allocator.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_allocator.cpp @@ -14,7 +14,7 @@ // device. #include -#include +#include constexpr size_t NumElems = 67; diff --git a/sycl/test-e2e/Basic/buffer/buffer_container.cpp b/sycl/test-e2e/Basic/buffer/buffer_container.cpp index 63395ad529169..0a263bb199a6c 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_container.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_container.cpp @@ -1,7 +1,7 @@ // RUN: %{build} %cxx_std_optionc++17 -o %t2.out // RUN: %{run} %t2.out -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer_create.cpp b/sycl/test-e2e/Basic/buffer/buffer_create.cpp index eda7f81d5f764..15955d96b5ebc 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_create.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_create.cpp @@ -4,7 +4,7 @@ // UNSUPPORTED: ze_debug #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer_dev_to_dev.cpp b/sycl/test-e2e/Basic/buffer/buffer_dev_to_dev.cpp index ef66cb97f3b65..e44f2a628dbc0 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_dev_to_dev.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_dev_to_dev.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/buffer/buffer_full_copy.cpp b/sycl/test-e2e/Basic/buffer/buffer_full_copy.cpp index b72cc4650cbac..20206f19b03eb 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_full_copy.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_full_copy.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include void check_copy_device_to_host(sycl::queue &Queue) { constexpr int size = 6, offset = 2; diff --git a/sycl/test-e2e/Basic/buffer/buffer_migrate.cpp b/sycl/test-e2e/Basic/buffer/buffer_migrate.cpp index 6096ee41d9918..4af49f8ad9bef 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_migrate.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_migrate.cpp @@ -7,7 +7,7 @@ // #include -#include +#include using namespace sycl; int main() { diff --git a/sycl/test-e2e/Basic/buffer/reinterpret.cpp b/sycl/test-e2e/Basic/buffer/reinterpret.cpp index 6dc5c40ff9e01..e61cd760a78c8 100644 --- a/sycl/test-e2e/Basic/buffer/reinterpret.cpp +++ b/sycl/test-e2e/Basic/buffer/reinterpret.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include // This tests verifies basic cases of using sycl::buffer::reinterpret // functionality - changing buffer type and range. This test checks that diff --git a/sycl/test-e2e/Basic/buffer/subbuffer.cpp b/sycl/test-e2e/Basic/buffer/subbuffer.cpp index 6a57883062f95..f34c86ab2be6f 100644 --- a/sycl/test-e2e/Basic/buffer/subbuffer.cpp +++ b/sycl/test-e2e/Basic/buffer/subbuffer.cpp @@ -16,7 +16,7 @@ // 1) Correct results after usage of different type of accessors to sub buffer // 2) Exceptions if we trying to create sub buffer not according to spec -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp b/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp index c925eebd0a8c9..af296fc3b56f9 100644 --- a/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp +++ b/sycl/test-e2e/Basic/buffer/subbuffer_overlap.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out // RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/built-ins/helpers.hpp b/sycl/test-e2e/Basic/built-ins/helpers.hpp index 203caefbe63a0..03a7c720e9afd 100644 --- a/sycl/test-e2e/Basic/built-ins/helpers.hpp +++ b/sycl/test-e2e/Basic/built-ins/helpers.hpp @@ -1,4 +1,6 @@ -#include +#include + +#include template bool equal(T x, T y, D delta) { // Maybe should be C++20's std::equality_comparable. diff --git a/sycl/test-e2e/Basic/built-ins/marray_math.cpp b/sycl/test-e2e/Basic/built-ins/marray_math.cpp index 46636a6ccd93d..f40de39214916 100644 --- a/sycl/test-e2e/Basic/built-ins/marray_math.cpp +++ b/sycl/test-e2e/Basic/built-ins/marray_math.cpp @@ -6,7 +6,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{run} %t_preview.out%} #include -#include +#include + +#include // Reference // https://github.com/KhronosGroup/SYCL-CTS/blob/SYCL-2020/util/accuracy.h diff --git a/sycl/test-e2e/Basic/built-ins/vec_common.cpp b/sycl/test-e2e/Basic/built-ins/vec_common.cpp index 090852ea14e78..ea62bdb7c72d4 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_common.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_common.cpp @@ -8,7 +8,9 @@ #endif #include -#include +#include + +#include #define TEST(FUNC, VEC_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ { \ diff --git a/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp b/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp index eceeadb40f2e2..432057a171f02 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_geometric.cpp @@ -3,7 +3,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include + +#include #define TEST(FUNC, VEC_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ { \ diff --git a/sycl/test-e2e/Basic/built-ins/vec_math.cpp b/sycl/test-e2e/Basic/built-ins/vec_math.cpp index ea6ab81431bc2..61ef24547ec94 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_math.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_math.cpp @@ -5,7 +5,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes %{mathflags} -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include + +#include #define TEST(FUNC, VEC_ELEM_TYPE, DIM, EXPECTED, DELTA, ...) \ { \ diff --git a/sycl/test-e2e/Basic/built-ins/vec_relational.cpp b/sycl/test-e2e/Basic/built-ins/vec_relational.cpp index 45f12f22952fa..48b347143c824 100644 --- a/sycl/test-e2e/Basic/built-ins/vec_relational.cpp +++ b/sycl/test-e2e/Basic/built-ins/vec_relational.cpp @@ -3,7 +3,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include + +#include #define TEST(FUNC, TYPE, EXPECTED, N, ...) \ { \ diff --git a/sycl/test-e2e/Basic/compare_exchange_strong.cpp b/sycl/test-e2e/Basic/compare_exchange_strong.cpp index 8b749119f97a9..c95b7797f0749 100644 --- a/sycl/test-e2e/Basic/compare_exchange_strong.cpp +++ b/sycl/test-e2e/Basic/compare_exchange_strong.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include using namespace sycl; int main() { diff --git a/sycl/test-e2e/Basic/context_platforms.cpp b/sycl/test-e2e/Basic/context_platforms.cpp index 28e1656101e89..9e81b8480e7a0 100644 --- a/sycl/test-e2e/Basic/context_platforms.cpp +++ b/sycl/test-e2e/Basic/context_platforms.cpp @@ -2,7 +2,7 @@ // RUN: %{run-unfiltered-devices} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/default_device.cpp b/sycl/test-e2e/Basic/default_device.cpp index 7cb44321e3150..f56ab6ae47a5a 100644 --- a/sycl/test-e2e/Basic/default_device.cpp +++ b/sycl/test-e2e/Basic/default_device.cpp @@ -9,7 +9,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::device Dev; diff --git a/sycl/test-e2e/Basic/default_platform.cpp b/sycl/test-e2e/Basic/default_platform.cpp index 5b7b7b9a68e7a..5fd2c324a7df1 100644 --- a/sycl/test-e2e/Basic/default_platform.cpp +++ b/sycl/test-e2e/Basic/default_platform.cpp @@ -9,7 +9,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::platform Plt; diff --git a/sycl/test-e2e/Basic/device-selectors.cpp b/sycl/test-e2e/Basic/device-selectors.cpp index 00c97d98f09f7..2c50e71138fd9 100644 --- a/sycl/test-e2e/Basic/device-selectors.cpp +++ b/sycl/test-e2e/Basic/device-selectors.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out -DSYCL_DISABLE_IMAGE_ASPECT_WARNING // RUN: %{run-unfiltered-devices} %t.out -#include +#include using namespace sycl; auto exception_handler_lambda = [](exception_list elist) { diff --git a/sycl/test-e2e/Basic/device.cpp b/sycl/test-e2e/Basic/device.cpp index 76978470ced08..be97706f3bc92 100644 --- a/sycl/test-e2e/Basic/device.cpp +++ b/sycl/test-e2e/Basic/device.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Basic/device_equality.cpp b/sycl/test-e2e/Basic/device_equality.cpp index d4719d99661d6..1655476b16373 100644 --- a/sycl/test-e2e/Basic/device_equality.cpp +++ b/sycl/test-e2e/Basic/device_equality.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Basic/diagnostics/device-check.cpp b/sycl/test-e2e/Basic/diagnostics/device-check.cpp index 25682a723db59..52e10db655dc5 100644 --- a/sycl/test-e2e/Basic/diagnostics/device-check.cpp +++ b/sycl/test-e2e/Basic/diagnostics/device-check.cpp @@ -18,7 +18,7 @@ //==---------------------------------------------------------------==// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/diagnostics/handler.cpp b/sycl/test-e2e/Basic/diagnostics/handler.cpp index d704335a6e5b9..70b5dbba1472c 100644 --- a/sycl/test-e2e/Basic/diagnostics/handler.cpp +++ b/sycl/test-e2e/Basic/diagnostics/handler.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include int main() { sycl::queue Queue([](sycl::exception_list ExceptionList) { diff --git a/sycl/test-e2e/Basic/enqueue_barrier.cpp b/sycl/test-e2e/Basic/enqueue_barrier.cpp index 6a2a118c2ff55..0eae48b74f28f 100644 --- a/sycl/test-e2e/Basic/enqueue_barrier.cpp +++ b/sycl/test-e2e/Basic/enqueue_barrier.cpp @@ -6,7 +6,7 @@ // UNSUPPORTED: windows #include -#include +#include int main() { sycl::context Context; diff --git a/sycl/test-e2e/Basic/event.cpp b/sycl/test-e2e/Basic/event.cpp index df4b44bed626b..000149bdc6807 100644 --- a/sycl/test-e2e/Basic/event.cpp +++ b/sycl/test-e2e/Basic/event.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// #include -#include +#include int main() { { diff --git a/sycl/test-e2e/Basic/event_async_exception.cpp b/sycl/test-e2e/Basic/event_async_exception.cpp index 9a9ef88a36896..20b7d5e71bc2f 100644 --- a/sycl/test-e2e/Basic/event_async_exception.cpp +++ b/sycl/test-e2e/Basic/event_async_exception.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include // This test checks that if there is a submit failure, the asynchronous // exception is associated with the returned event. diff --git a/sycl/test-e2e/Basic/fill_accessor.cpp b/sycl/test-e2e/Basic/fill_accessor.cpp index 0c17e02aa2ca0..7c3996b2fc4c2 100644 --- a/sycl/test-e2e/Basic/fill_accessor.cpp +++ b/sycl/test-e2e/Basic/fill_accessor.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/fill_accessor_pi.cpp b/sycl/test-e2e/Basic/fill_accessor_pi.cpp index a92cc44c0fa51..820c2fdeb66d7 100644 --- a/sycl/test-e2e/Basic/fill_accessor_pi.cpp +++ b/sycl/test-e2e/Basic/fill_accessor_pi.cpp @@ -4,7 +4,7 @@ // This test merely checks the use of the correct PI call. Its sister test // fill_accessor.cpp thoroughly checks the workings of the .fill() call. -#include +#include constexpr int width = 32; constexpr int height = 16; constexpr int depth = 8; diff --git a/sycl/test-e2e/Basic/fpga_tests/pipes_info.cpp b/sycl/test-e2e/Basic/fpga_tests/pipes_info.cpp index 5d42f4b37b715..aa54e5d6f7162 100644 --- a/sycl/test-e2e/Basic/fpga_tests/pipes_info.cpp +++ b/sycl/test-e2e/Basic/fpga_tests/pipes_info.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include +#include int main() { sycl::queue Queue; diff --git a/sycl/test-e2e/Basic/get_backend.cpp b/sycl/test-e2e/Basic/get_backend.cpp index c3930b607b78a..374bcc0fba8f7 100644 --- a/sycl/test-e2e/Basic/get_backend.cpp +++ b/sycl/test-e2e/Basic/get_backend.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/get_info_aspect.cpp b/sycl/test-e2e/Basic/get_info_aspect.cpp index cea05384cc5a8..7576e0f98d11b 100644 --- a/sycl/test-e2e/Basic/get_info_aspect.cpp +++ b/sycl/test-e2e/Basic/get_info_aspect.cpp @@ -5,7 +5,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::queue q; bool failed = true; diff --git a/sycl/test-e2e/Basic/half_builtins.cpp b/sycl/test-e2e/Basic/half_builtins.cpp index 9637742cb67bb..e05b14ccb5555 100644 --- a/sycl/test-e2e/Basic/half_builtins.cpp +++ b/sycl/test-e2e/Basic/half_builtins.cpp @@ -2,7 +2,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/Basic/half_type.cpp b/sycl/test-e2e/Basic/half_type.cpp index b5769089abf43..110d428f82ee0 100644 --- a/sycl/test-e2e/Basic/half_type.cpp +++ b/sycl/test-e2e/Basic/half_type.cpp @@ -12,7 +12,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Basic/handler/handler_copy_with_offset.cpp b/sycl/test-e2e/Basic/handler/handler_copy_with_offset.cpp index cd3a936475d5e..97810b92d570a 100644 --- a/sycl/test-e2e/Basic/handler/handler_copy_with_offset.cpp +++ b/sycl/test-e2e/Basic/handler/handler_copy_with_offset.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/handler/handler_generic_integral_lambda.cpp b/sycl/test-e2e/Basic/handler/handler_generic_integral_lambda.cpp index 27883afc064c1..b68022ff45141 100644 --- a/sycl/test-e2e/Basic/handler/handler_generic_integral_lambda.cpp +++ b/sycl/test-e2e/Basic/handler/handler_generic_integral_lambda.cpp @@ -7,7 +7,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/handler/handler_mem_op.cpp b/sycl/test-e2e/Basic/handler/handler_mem_op.cpp index c3c8fd625e760..8fe99aa149951 100644 --- a/sycl/test-e2e/Basic/handler/handler_mem_op.cpp +++ b/sycl/test-e2e/Basic/handler/handler_mem_op.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/host-task-dependency.cpp b/sycl/test-e2e/Basic/host-task-dependency.cpp index 1e5903f76173f..c142b92e22587 100644 --- a/sycl/test-e2e/Basic/host-task-dependency.cpp +++ b/sycl/test-e2e/Basic/host-task-dependency.cpp @@ -7,7 +7,7 @@ #define SYCL2020_DISABLE_DEPRECATION_WARNINGS -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/host_defer_copy.cpp b/sycl/test-e2e/Basic/host_defer_copy.cpp index 468f748212375..f4711d6339920 100644 --- a/sycl/test-e2e/Basic/host_defer_copy.cpp +++ b/sycl/test-e2e/Basic/host_defer_copy.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include constexpr int N = 10 * 1024 * 1024; diff --git a/sycl/test-e2e/Basic/host_write_back.cpp b/sycl/test-e2e/Basic/host_write_back.cpp index 44291bbc039bd..ed21bdb61e098 100644 --- a/sycl/test-e2e/Basic/host_write_back.cpp +++ b/sycl/test-e2e/Basic/host_write_back.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/image/image_constructors.cpp b/sycl/test-e2e/Basic/image/image_constructors.cpp index 260a31a0378d9..585e0ff65a562 100644 --- a/sycl/test-e2e/Basic/image/image_constructors.cpp +++ b/sycl/test-e2e/Basic/image/image_constructors.cpp @@ -10,7 +10,7 @@ // Tests the constructors, size and get_range APIs. #include -#include +#include void no_delete(void *) {} diff --git a/sycl/test-e2e/Basic/image/srgba-aspect-check.cpp b/sycl/test-e2e/Basic/image/srgba-aspect-check.cpp index 3715ef536de7c..5c892d716fcc5 100644 --- a/sycl/test-e2e/Basic/image/srgba-aspect-check.cpp +++ b/sycl/test-e2e/Basic/image/srgba-aspect-check.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/info.cpp b/sycl/test-e2e/Basic/info.cpp index 398fd161728c4..b6e9546a62d84 100644 --- a/sycl/test-e2e/Basic/info.cpp +++ b/sycl/test-e2e/Basic/info.cpp @@ -8,7 +8,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/interop/construction_ze.cpp b/sycl/test-e2e/Basic/interop/construction_ze.cpp index 7ef4c89c7695a..57f89f7d41059 100644 --- a/sycl/test-e2e/Basic/interop/construction_ze.cpp +++ b/sycl/test-e2e/Basic/interop/construction_ze.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include constexpr auto BE = sycl::backend::ext_oneapi_level_zero; diff --git a/sycl/test-e2e/Basic/interop/ze_context_device.cpp b/sycl/test-e2e/Basic/interop/ze_context_device.cpp index 104205858a339..9dd34e93ba54d 100644 --- a/sycl/test-e2e/Basic/interop/ze_context_device.cpp +++ b/sycl/test-e2e/Basic/interop/ze_context_device.cpp @@ -5,7 +5,7 @@ // This test checks that an interop Level Zero device is properly handled during // interop context construction. #include -#include +#include #include diff --git a/sycl/test-e2e/Basic/library_loading.cpp b/sycl/test-e2e/Basic/library_loading.cpp index 8a3244e6599af..59f0434c012da 100644 --- a/sycl/test-e2e/Basic/library_loading.cpp +++ b/sycl/test-e2e/Basic/library_loading.cpp @@ -4,7 +4,7 @@ // RUN: FileCheck --input-file=%t_trace_no_filter.txt --check-prefix=CHECK-NO-FILTER %s -dump-input=fail // Checks pi traces on library loading -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/linear-sub_group.cpp b/sycl/test-e2e/Basic/linear-sub_group.cpp index 169876874896d..df99664c8f9b9 100644 --- a/sycl/test-e2e/Basic/linear-sub_group.cpp +++ b/sycl/test-e2e/Basic/linear-sub_group.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/local_mem_type.cpp b/sycl/test-e2e/Basic/local_mem_type.cpp index 78aa2d9073fb0..7ae5b64ab8afa 100644 --- a/sycl/test-e2e/Basic/local_mem_type.cpp +++ b/sycl/test-e2e/Basic/local_mem_type.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include +#include using namespace sycl; int main() { diff --git a/sycl/test-e2e/Basic/multisource.cpp b/sycl/test-e2e/Basic/multisource.cpp index 056e9e0631b25..778fc7007e0a8 100644 --- a/sycl/test-e2e/Basic/multisource.cpp +++ b/sycl/test-e2e/Basic/multisource.cpp @@ -19,7 +19,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.init.o %t.calc.o %t.main.o -o %t.fat // RUN: %{run} %t.fat -#include +#include #include diff --git a/sycl/test-e2e/Basic/multisource_spv_obj.cpp b/sycl/test-e2e/Basic/multisource_spv_obj.cpp index b05db159473a2..46bbfceac0fb9 100644 --- a/sycl/test-e2e/Basic/multisource_spv_obj.cpp +++ b/sycl/test-e2e/Basic/multisource_spv_obj.cpp @@ -28,7 +28,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %t.init.o %t.calc.o %t.main.o -o %t.fat // RUN: %{run} %t.fat -#include +#include #include diff --git a/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp b/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp index c588874d3b6b6..097405ef5763b 100644 --- a/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp +++ b/sycl/test-e2e/Basic/offset-accessor-get_pointer.cpp @@ -11,7 +11,7 @@ // accessors in the past. Not relevant for images, which do not support offset // accessors. -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Basic/parallel_for_disable_range_roundup.cpp b/sycl/test-e2e/Basic/parallel_for_disable_range_roundup.cpp index 3a7fd80f249f1..2b747d5a7cbc2 100644 --- a/sycl/test-e2e/Basic/parallel_for_disable_range_roundup.cpp +++ b/sycl/test-e2e/Basic/parallel_for_disable_range_roundup.cpp @@ -10,7 +10,7 @@ // RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %{run} %t.out | FileCheck %s --check-prefix CHECK-ENABLED #include -#include +#include using namespace sycl; range<1> Range1 = {0}; diff --git a/sycl/test-e2e/Basic/parallel_for_offset_integral_t.cpp b/sycl/test-e2e/Basic/parallel_for_offset_integral_t.cpp index 35380ae45a428..74eae3ec9e3af 100644 --- a/sycl/test-e2e/Basic/parallel_for_offset_integral_t.cpp +++ b/sycl/test-e2e/Basic/parallel_for_offset_integral_t.cpp @@ -3,7 +3,7 @@ // RUN: %{build} -DLAMBDA_KERNEL=0 -DSYCL2020_DISABLE_DEPRECATION_WARNINGS -o %t2.out // RUN: %{run} %t2.out -#include +#include template class func { AccT acc; diff --git a/sycl/test-e2e/Basic/partition_supported.cpp b/sycl/test-e2e/Basic/partition_supported.cpp index f75a983754112..1dfcec0eae158 100644 --- a/sycl/test-e2e/Basic/partition_supported.cpp +++ b/sycl/test-e2e/Basic/partition_supported.cpp @@ -14,7 +14,7 @@ not support the info::partition_affinity_domain provided, an exception with the */ #include -#include +#include /** returns true if the device supports a particular affinity domain */ static bool diff --git a/sycl/test-e2e/Basic/queue/queue_select_device_not_in_context.cpp b/sycl/test-e2e/Basic/queue/queue_select_device_not_in_context.cpp index 30f68dac1b558..418b8891956fe 100644 --- a/sycl/test-e2e/Basic/queue/queue_select_device_not_in_context.cpp +++ b/sycl/test-e2e/Basic/queue/queue_select_device_not_in_context.cpp @@ -4,7 +4,7 @@ // This test checks that the queue constructor throws a sycl::exception if the // device selected by the provided selector is not in the specified context. -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/queue/queue_shortcut_functions.cpp b/sycl/test-e2e/Basic/queue/queue_shortcut_functions.cpp index 14beee2134dd3..3bbf9cc74daf2 100644 --- a/sycl/test-e2e/Basic/queue/queue_shortcut_functions.cpp +++ b/sycl/test-e2e/Basic/queue/queue_shortcut_functions.cpp @@ -9,7 +9,7 @@ // //==------------------------------------------------------------------------==// -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/queue/release.cpp b/sycl/test-e2e/Basic/queue/release.cpp index 87f3dc0ae8dbb..b56be05dc5308 100644 --- a/sycl/test-e2e/Basic/queue/release.cpp +++ b/sycl/test-e2e/Basic/queue/release.cpp @@ -3,7 +3,7 @@ // // XFAIL: hip_nvidia -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp b/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp index e9cfe42a458c0..76a5739e9ee35 100644 --- a/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp +++ b/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp @@ -3,7 +3,7 @@ // UNSUPPORTED: hip -#include +#include #define CHECK_INVALID_REQD_WORK_GROUP_SIZE(Dim, ...) \ { \ diff --git a/sycl/test-e2e/Basic/spirv_device_obj_smoke.cpp b/sycl/test-e2e/Basic/spirv_device_obj_smoke.cpp index b4bd89e6ee1fc..4c6f371814b69 100644 --- a/sycl/test-e2e/Basic/spirv_device_obj_smoke.cpp +++ b/sycl/test-e2e/Basic/spirv_device_obj_smoke.cpp @@ -5,7 +5,7 @@ // This test verifies SPIR-V based fat objects. -#include +#include int main() { sycl::buffer Buffer(4); diff --git a/sycl/test-e2e/Basic/submit_barrier.cpp b/sycl/test-e2e/Basic/submit_barrier.cpp index 763ed9f642547..0b15314d05246 100644 --- a/sycl/test-e2e/Basic/submit_barrier.cpp +++ b/sycl/test-e2e/Basic/submit_barrier.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include int main() { diff --git a/sycl/test-e2e/Basic/swizzle_op.cpp b/sycl/test-e2e/Basic/swizzle_op.cpp index d87fdf26c486e..a81b9cb16e54d 100644 --- a/sycl/test-e2e/Basic/swizzle_op.cpp +++ b/sycl/test-e2e/Basic/swizzle_op.cpp @@ -10,7 +10,7 @@ #define SYCL_SIMPLE_SWIZZLES #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Basic/sycl-namespace.cpp b/sycl/test-e2e/Basic/sycl-namespace.cpp index f1f34a8ab57af..e4c8a64e611cf 100644 --- a/sycl/test-e2e/Basic/sycl-namespace.cpp +++ b/sycl/test-e2e/Basic/sycl-namespace.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { ::sycl::queue q; diff --git a/sycl/test-e2e/Basic/unused_pointer.cpp b/sycl/test-e2e/Basic/unused_pointer.cpp index 08701954a7ea4..89e24720ff738 100644 --- a/sycl/test-e2e/Basic/unused_pointer.cpp +++ b/sycl/test-e2e/Basic/unused_pointer.cpp @@ -10,7 +10,7 @@ //==----------------------------------------------------------------------==// #include -#include +#include using namespace std; diff --git a/sycl/test-e2e/Basic/vector/bool.cpp b/sycl/test-e2e/Basic/vector/bool.cpp index 00257baebe969..424bf0e5d83a0 100644 --- a/sycl/test-e2e/Basic/vector/bool.cpp +++ b/sycl/test-e2e/Basic/vector/bool.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include +#include constexpr int size = 2; diff --git a/sycl/test-e2e/Basic/vector/byte.cpp b/sycl/test-e2e/Basic/vector/byte.cpp index 36b16737140cd..a2d70d1a0ba31 100644 --- a/sycl/test-e2e/Basic/vector/byte.cpp +++ b/sycl/test-e2e/Basic/vector/byte.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include // std::byte #include // std::ignore diff --git a/sycl/test-e2e/Basic/vector/int-convert.cpp b/sycl/test-e2e/Basic/vector/int-convert.cpp index e09390d9c14e3..c9ff8acfaab0e 100644 --- a/sycl/test-e2e/Basic/vector/int-convert.cpp +++ b/sycl/test-e2e/Basic/vector/int-convert.cpp @@ -10,7 +10,7 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -DSYCL2020_DISABLE_DEPRECATION_WARNINGS -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include #include #include diff --git a/sycl/test-e2e/Basic/vector/operators.cpp b/sycl/test-e2e/Basic/vector/operators.cpp index 2648fd939e166..ddd1bdb2a2b66 100644 --- a/sycl/test-e2e/Basic/vector/operators.cpp +++ b/sycl/test-e2e/Basic/vector/operators.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #define SYCL_SIMPLE_SWIZZLES -#include +#include namespace s = sycl; template diff --git a/sycl/test-e2e/Complex/sycl_complex_include_order.cpp b/sycl/test-e2e/Complex/sycl_complex_include_order.cpp index 245d222c65209..92c4d341e7f3f 100644 --- a/sycl/test-e2e/Complex/sycl_complex_include_order.cpp +++ b/sycl/test-e2e/Complex/sycl_complex_include_order.cpp @@ -18,7 +18,7 @@ #include #endif -#include +#include #ifndef INCLUDE_BEFORE #include diff --git a/sycl/test-e2e/Config/config.cpp b/sycl/test-e2e/Config/config.cpp index d369d72f0cfe6..701fd8c2961f0 100644 --- a/sycl/test-e2e/Config/config.cpp +++ b/sycl/test-e2e/Config/config.cpp @@ -16,7 +16,7 @@ // RUN: %t.out // RUN: ls | not grep dot -#include +#include int main() { sycl::buffer Buf(sycl::range<1>{1}); diff --git a/sycl/test-e2e/Config/default-context.cpp b/sycl/test-e2e/Config/default-context.cpp index 53148feab12f5..11e3c2e42302c 100644 --- a/sycl/test-e2e/Config/default-context.cpp +++ b/sycl/test-e2e/Config/default-context.cpp @@ -3,7 +3,7 @@ // RUN: env SYCL_ENABLE_DEFAULT_CONTEXTS=1 %t.out // RUN: env SYCL_ENABLE_DEFAULT_CONTEXTS=0 %t.out 1 -#include +#include // when not using the environment variable, we use the "default context" on both // Lin and Win. This test asserts it defaults correctly, and that the diff --git a/sycl/test-e2e/Config/device_selector.cpp b/sycl/test-e2e/Config/device_selector.cpp index 74ad5db48dafe..6ae5213b5810f 100644 --- a/sycl/test-e2e/Config/device_selector.cpp +++ b/sycl/test-e2e/Config/device_selector.cpp @@ -4,7 +4,7 @@ // Checks that no device is selected when no device of desired type is // available. -#include +#include #include diff --git a/sycl/test-e2e/Config/env_vars.cpp b/sycl/test-e2e/Config/env_vars.cpp index 2cf3f2efc2fcb..97ed4ef884d0f 100644 --- a/sycl/test-e2e/Config/env_vars.cpp +++ b/sycl/test-e2e/Config/env_vars.cpp @@ -15,7 +15,7 @@ #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Config/kernel_from_file.cpp b/sycl/test-e2e/Config/kernel_from_file.cpp index f890036511772..aaaaa27e11bb1 100644 --- a/sycl/test-e2e/Config/kernel_from_file.cpp +++ b/sycl/test-e2e/Config/kernel_from_file.cpp @@ -18,7 +18,7 @@ // CHECK: Passed #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Config/select_device.cpp b/sycl/test-e2e/Config/select_device.cpp index 2d62c1bf5e1e0..8cab3a707ad44 100644 --- a/sycl/test-e2e/Config/select_device.cpp +++ b/sycl/test-e2e/Config/select_device.cpp @@ -57,7 +57,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/deprecated.cpp b/sycl/test-e2e/DeprecatedFeatures/deprecated.cpp index 6b82cd1a8adeb..aa185afc3da2e 100644 --- a/sycl/test-e2e/DeprecatedFeatures/deprecated.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/deprecated.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/deprecated_sycl_device_filter.cpp b/sycl/test-e2e/DeprecatedFeatures/deprecated_sycl_device_filter.cpp index 9710d58050447..12fa070e86507 100644 --- a/sycl/test-e2e/DeprecatedFeatures/deprecated_sycl_device_filter.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/deprecated_sycl_device_filter.cpp @@ -18,7 +18,7 @@ // This test is to check if a warning message is displayed when using the // enviroment variable SYCL_DEVICE_FILTER // TODO: Remove test when SYCL_DEVICE_FILTER is removed -#include +#include int main() { using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/platform.cpp b/sycl/test-e2e/DeprecatedFeatures/platform.cpp index 9f1829c91229a..528309688caa3 100644 --- a/sycl/test-e2e/DeprecatedFeatures/platform.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/platform.cpp @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp b/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp index a31df4ff8b8b3..79e8e04d20134 100644 --- a/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/queue_old_interop.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/DeprecatedFeatures/sycl_device_filter.cpp b/sycl/test-e2e/DeprecatedFeatures/sycl_device_filter.cpp index 046ee7de6985c..d717030d368d9 100644 --- a/sycl/test-e2e/DeprecatedFeatures/sycl_device_filter.cpp +++ b/sycl/test-e2e/DeprecatedFeatures/sycl_device_filter.cpp @@ -4,7 +4,7 @@ // RUN: %if any-device-is-acc %{ env SYCL_DEVICE_FILTER=acc %{run-unfiltered-devices} %t.out %} // TODO: Remove this test once SYCL_DEVICE_FILTER is removed. -#include +#include int main() { namespace dev_info = sycl::info::device; diff --git a/sycl/test-e2e/DeviceArchitecture/device_architecture_on_host.cpp b/sycl/test-e2e/DeviceArchitecture/device_architecture_on_host.cpp index 4f13d16b4a694..61ca876b64d01 100644 --- a/sycl/test-e2e/DeviceArchitecture/device_architecture_on_host.cpp +++ b/sycl/test-e2e/DeviceArchitecture/device_architecture_on_host.cpp @@ -3,7 +3,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp index e5fa01bd012bd..b58e1ce59899a 100644 --- a/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/split-per-kernel.cpp @@ -4,7 +4,7 @@ // // XFAIL: hip_nvidia -#include +#include class Kern1; class Kern2; diff --git a/sycl/test-e2e/DeviceLib/built-ins/fast-math-flag.cpp b/sycl/test-e2e/DeviceLib/built-ins/fast-math-flag.cpp index 26934041d0a87..59e02c7a37add 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/fast-math-flag.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/fast-math-flag.cpp @@ -2,7 +2,9 @@ // RUN: %{run} %t.out #include -#include +#include + +#include using namespace sycl; diff --git a/sycl/test-e2e/DeviceLib/built-ins/ftz-flag.cpp b/sycl/test-e2e/DeviceLib/built-ins/ftz-flag.cpp index 37917e103c7a6..8c68c15d34352 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/ftz-flag.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/ftz-flag.cpp @@ -4,9 +4,12 @@ // RUN: %{run} %t.out #include -#include #include +#include + +#include + using namespace sycl; constexpr float eps = 1e-6; diff --git a/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp index c4eb526f23c04..31f3aeef3c565 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/marray_integer.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #define TEST(FUNC, MARRAY_RET_TYPE, DIM, EXPECTED, ...) \ { \ diff --git a/sycl/test-e2e/DeviceLib/built-ins/nan.cpp b/sycl/test-e2e/DeviceLib/built-ins/nan.cpp index 0e60689a6b74f..4d723acbc415d 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/nan.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/nan.cpp @@ -5,10 +5,12 @@ // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} #include -#include - #include +#include + +#include + namespace s = sycl; using namespace std; diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_common.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_common.cpp index 0bb04b5dc336a..5e12037006e91 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_common.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_common.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_geometric.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_geometric.cpp index 81c60ed484806..634706c578ccf 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_geometric.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_geometric.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp index 314b6fd5515cc..bcd640d37ef54 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_integer.cpp @@ -4,7 +4,9 @@ // RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %} // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_math.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_math.cpp index aafb32379e479..ffd405ef5b21b 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_math.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_math.cpp @@ -3,7 +3,9 @@ // RUN: %{build} %{mathflags} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_math_2.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_math_2.cpp index 3bce5c5dc2ee2..919e6c8d13dae 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_math_2.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_math_2.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp b/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp index 7a62735397e57..0eab2036a98b8 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/scalar_relational.cpp @@ -2,7 +2,9 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out %{mathflags} // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_common.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_common.cpp index 11feb90d71d19..d817eacf61079 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_common.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_common.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_geometric.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_geometric.cpp index 86c83fa6d956b..6fa9948e77c5c 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_geometric.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_geometric.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_integer.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_integer.cpp index 739f7b020db55..f7d1aa1b55c33 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_integer.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_integer.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_math.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_math.cpp index bbd80cc2599ce..5b6075f06f11d 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_math.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_math.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/built-ins/vector_relational.cpp b/sycl/test-e2e/DeviceLib/built-ins/vector_relational.cpp index f895c85c1c90a..d5938a08e31a4 100644 --- a/sycl/test-e2e/DeviceLib/built-ins/vector_relational.cpp +++ b/sycl/test-e2e/DeviceLib/built-ins/vector_relational.cpp @@ -4,7 +4,9 @@ // RUN: %{build} -o %t.out %{mathflags} // RUN: %{run} %t.out -#include +#include + +#include #include #include diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 77aeb312571d2..6391154feb820 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; diff --git a/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp b/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp index 7683c2eb97add..43a90d8d518bc 100644 --- a/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp +++ b/sycl/test-e2e/DeviceLib/imf_bfloat16_integeral_convesions.cpp @@ -11,7 +11,7 @@ #include "imf_utils.hpp" #include -#include +#include int main() { sycl::queue device_queue(sycl::default_selector_v); diff --git a/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp b/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp index ecd4561e98c9d..493cb2a0b1f7e 100644 --- a/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf_simd_emulate_test.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 355d3f1ab2d55..94b91255a5f1b 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; diff --git a/sycl/test-e2e/DeviceLib/math_override_test.cpp b/sycl/test-e2e/DeviceLib/math_override_test.cpp index c0feaf37f3094..491f21d76a486 100644 --- a/sycl/test-e2e/DeviceLib/math_override_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_override_test.cpp @@ -2,7 +2,7 @@ // RUN: %clangxx -fsycl %s -o %t.out -fno-builtin #include #include -#include +#include #include "math_utils.hpp" namespace s = sycl; diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index d2e44f51cb48b..ccc368a21e668 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; diff --git a/sycl/test-e2e/DotProduct/dot_product_int_test.cpp b/sycl/test-e2e/DotProduct/dot_product_int_test.cpp index fdd32c0127468..41749efd13055 100644 --- a/sycl/test-e2e/DotProduct/dot_product_int_test.cpp +++ b/sycl/test-e2e/DotProduct/dot_product_int_test.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include // Change if tests are added/removed static int testCount = 4; diff --git a/sycl/test-e2e/DotProduct/dot_product_vec_test.cpp b/sycl/test-e2e/DotProduct/dot_product_vec_test.cpp index c22fbcd8274c1..a83774d712e40 100644 --- a/sycl/test-e2e/DotProduct/dot_product_vec_test.cpp +++ b/sycl/test-e2e/DotProduct/dot_product_vec_test.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include // Change if tests are added/removed static int testCount = 4; diff --git a/sycl/test-e2e/ESIMD/regression/double_conversion.cpp b/sycl/test-e2e/ESIMD/regression/double_conversion.cpp index 66bd9ee478658..654629e73e39c 100644 --- a/sycl/test-e2e/ESIMD/regression/double_conversion.cpp +++ b/sycl/test-e2e/ESIMD/regression/double_conversion.cpp @@ -12,7 +12,7 @@ // RUN: %{run} %t.out #include -#include +#include #include #include diff --git a/sycl/test-e2e/ESIMD/regression/fmod_compatibility_test.cpp b/sycl/test-e2e/ESIMD/regression/fmod_compatibility_test.cpp index 5f7a64a6b79b7..d449c11858f59 100644 --- a/sycl/test-e2e/ESIMD/regression/fmod_compatibility_test.cpp +++ b/sycl/test-e2e/ESIMD/regression/fmod_compatibility_test.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include constexpr auto sycl_write = sycl::access::mode::write; diff --git a/sycl/test-e2e/ESIMD/regression/half_conversion_test.cpp b/sycl/test-e2e/ESIMD/regression/half_conversion_test.cpp index c6f3eb4d9ec52..5a2a26a5ad485 100644 --- a/sycl/test-e2e/ESIMD/regression/half_conversion_test.cpp +++ b/sycl/test-e2e/ESIMD/regression/half_conversion_test.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/ESIMD/regression/sycl_esimd_mixed_unnamed.cpp b/sycl/test-e2e/ESIMD/regression/sycl_esimd_mixed_unnamed.cpp index f34caf146bc2f..bff75087186a6 100644 --- a/sycl/test-e2e/ESIMD/regression/sycl_esimd_mixed_unnamed.cpp +++ b/sycl/test-e2e/ESIMD/regression/sycl_esimd_mixed_unnamed.cpp @@ -13,7 +13,7 @@ #include #include -#include +#include using namespace ::sycl; diff --git a/sycl/test-e2e/ESIMD/regression/unused_load.cpp b/sycl/test-e2e/ESIMD/regression/unused_load.cpp index 2442ea3634588..1400cf5d31744 100644 --- a/sycl/test-e2e/ESIMD/regression/unused_load.cpp +++ b/sycl/test-e2e/ESIMD/regression/unused_load.cpp @@ -12,7 +12,7 @@ // copy_from invocation. #include -#include +#include #include diff --git a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp index 80963c5168a46..da3d34df48f37 100644 --- a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp +++ b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp @@ -9,7 +9,7 @@ // REQUIRES: level_zero,gpu #include -#include +#include using namespace sycl; using namespace std; diff --git a/sycl/test-e2e/Functor/functor_inheritance.cpp b/sycl/test-e2e/Functor/functor_inheritance.cpp index 926fcc2859955..bcb8f2d9394cc 100644 --- a/sycl/test-e2e/Functor/functor_inheritance.cpp +++ b/sycl/test-e2e/Functor/functor_inheritance.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include constexpr auto sycl_read_write = sycl::access::mode::read_write; constexpr auto sycl_device = sycl::access::target::device; diff --git a/sycl/test-e2e/Functor/kernel_functor.cpp b/sycl/test-e2e/Functor/kernel_functor.cpp index 09840ff6705db..de6cc865c5ea9 100644 --- a/sycl/test-e2e/Functor/kernel_functor.cpp +++ b/sycl/test-e2e/Functor/kernel_functor.cpp @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include diff --git a/sycl/test-e2e/HierPar/hier_par_basic.cpp b/sycl/test-e2e/HierPar/hier_par_basic.cpp index 523a26d6e8ee6..a51a582b2e00f 100644 --- a/sycl/test-e2e/HierPar/hier_par_basic.cpp +++ b/sycl/test-e2e/HierPar/hier_par_basic.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include using namespace sycl; @@ -59,7 +59,7 @@ struct PFWIFunctor { if (id >= wg_chunk) return; size_t wi_offset = wg_offset + id * wi_chunk; - size_t ub = sycl::min(wi_offset + wi_chunk, range_length); + size_t ub = std::min(wi_offset + wi_chunk, range_length); for (size_t ind = wi_offset; ind < ub; ind++) dev_ptr[ind] += v; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp index 8399cf410a094..1f60d9a95f2b8 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency2.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out 10 #include -#include +#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp index a1b1c778a8003..bac3f777c7be1 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency3.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp b/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp index 1a8c8faf1a674..bb315329195bf 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-dependency4.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out -#include +#include sycl::event submit(sycl::queue &Q, sycl::buffer &B) { return Q.submit([&](sycl::handler &CGH) { diff --git a/sycl/test-e2e/HostInteropTask/host-task-failure.cpp b/sycl/test-e2e/HostInteropTask/host-task-failure.cpp index 9afdd9ba43907..eef7cf70b21bb 100644 --- a/sycl/test-e2e/HostInteropTask/host-task-failure.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task-failure.cpp @@ -4,7 +4,7 @@ // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows -#include +#include using namespace sycl; using namespace sycl::access; diff --git a/sycl/test-e2e/HostInteropTask/host-task.cpp b/sycl/test-e2e/HostInteropTask/host-task.cpp index b1e588b147e2a..6bae8850cc6c2 100644 --- a/sycl/test-e2e/HostInteropTask/host-task.cpp +++ b/sycl/test-e2e/HostInteropTask/host-task.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_bad_opcode.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_bad_opcode.cpp index a16b903c09a84..ecec5787c1e1a 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_bad_opcode.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_bad_opcode.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_bad_operand_syntax.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_bad_operand_syntax.cpp index ea7434dd31498..2cd70a2051dc0 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_bad_operand_syntax.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_bad_operand_syntax.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_duplicate_label.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_duplicate_label.cpp index 58978e203d8a8..773a596fbe549 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_duplicate_label.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_duplicate_label.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_illegal_exec_size.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_illegal_exec_size.cpp index 98d9b1ff1520d..715df0c00bc14 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_illegal_exec_size.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_illegal_exec_size.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_missing_label.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_missing_label.cpp index 47f0fd98311ae..6dfbe70e92030 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_missing_label.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_missing_label.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_missing_region.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_missing_region.cpp index 395eb4af68c03..3ace63d902bb4 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_missing_region.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_missing_region.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_simple.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_simple.cpp index e36a15cf1cbf4..7372765faf3fa 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_simple.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_simple.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_undefined_decl.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_undefined_decl.cpp index 53b64cf2ba2fd..f1ce6e2f729c2 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_undefined_decl.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_undefined_decl.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_undefined_pred.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_undefined_pred.cpp index 0ce42082fe2ef..286e7ee9b860a 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_undefined_pred.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_undefined_pred.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/Negative/asm_wrong_declare.cpp b/sycl/test-e2e/InlineAsm/Negative/asm_wrong_declare.cpp index 86151cdc7cab7..20e62c61813b2 100644 --- a/sycl/test-e2e/InlineAsm/Negative/asm_wrong_declare.cpp +++ b/sycl/test-e2e/InlineAsm/Negative/asm_wrong_declare.cpp @@ -5,7 +5,7 @@ // RUN: %{run} %t.out #include "../include/asmhelper.h" -#include +#include struct KernelFunctor { KernelFunctor() {} diff --git a/sycl/test-e2e/InlineAsm/asm_16_empty.cpp b/sycl/test-e2e/InlineAsm/asm_16_empty.cpp index 78321716777ac..a1d2ec9220763 100644 --- a/sycl/test-e2e/InlineAsm/asm_16_empty.cpp +++ b/sycl/test-e2e/InlineAsm/asm_16_empty.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_16_matrix_mult.cpp b/sycl/test-e2e/InlineAsm/asm_16_matrix_mult.cpp index 00c3eb3830f38..f92912919c786 100644 --- a/sycl/test-e2e/InlineAsm/asm_16_matrix_mult.cpp +++ b/sycl/test-e2e/InlineAsm/asm_16_matrix_mult.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_16_no_input_int.cpp b/sycl/test-e2e/InlineAsm/asm_16_no_input_int.cpp index 00c3eb3830f38..f92912919c786 100644 --- a/sycl/test-e2e/InlineAsm/asm_16_no_input_int.cpp +++ b/sycl/test-e2e/InlineAsm/asm_16_no_input_int.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_16_no_opts.cpp b/sycl/test-e2e/InlineAsm/asm_16_no_opts.cpp index 812e30a7c9f56..07286b34b8d4d 100644 --- a/sycl/test-e2e/InlineAsm/asm_16_no_opts.cpp +++ b/sycl/test-e2e/InlineAsm/asm_16_no_opts.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_arbitrary_ops_order.cpp b/sycl/test-e2e/InlineAsm/asm_arbitrary_ops_order.cpp index 739feed41d6c9..9097a40131bbb 100644 --- a/sycl/test-e2e/InlineAsm/asm_arbitrary_ops_order.cpp +++ b/sycl/test-e2e/InlineAsm/asm_arbitrary_ops_order.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_decl_in_scope.cpp b/sycl/test-e2e/InlineAsm/asm_decl_in_scope.cpp index 60d200d1c99df..a6a754289e533 100644 --- a/sycl/test-e2e/InlineAsm/asm_decl_in_scope.cpp +++ b/sycl/test-e2e/InlineAsm/asm_decl_in_scope.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_float_add.cpp b/sycl/test-e2e/InlineAsm/asm_float_add.cpp index b6374c96d2010..f1d4d681b8edc 100644 --- a/sycl/test-e2e/InlineAsm/asm_float_add.cpp +++ b/sycl/test-e2e/InlineAsm/asm_float_add.cpp @@ -6,7 +6,7 @@ #include "include/asmhelper.h" #include #include -#include +#include #include using dataType = sycl::opencl::cl_float; diff --git a/sycl/test-e2e/InlineAsm/asm_float_imm_arg.cpp b/sycl/test-e2e/InlineAsm/asm_float_imm_arg.cpp index 948d983554c4e..b10aec8e47278 100644 --- a/sycl/test-e2e/InlineAsm/asm_float_imm_arg.cpp +++ b/sycl/test-e2e/InlineAsm/asm_float_imm_arg.cpp @@ -6,7 +6,7 @@ #include "include/asmhelper.h" #include #include -#include +#include #include constexpr float IMM_ARGUMENT = 0.5; diff --git a/sycl/test-e2e/InlineAsm/asm_float_neg.cpp b/sycl/test-e2e/InlineAsm/asm_float_neg.cpp index 43ad56e41222d..307a853fa407f 100644 --- a/sycl/test-e2e/InlineAsm/asm_float_neg.cpp +++ b/sycl/test-e2e/InlineAsm/asm_float_neg.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_float; diff --git a/sycl/test-e2e/InlineAsm/asm_if.cpp b/sycl/test-e2e/InlineAsm/asm_if.cpp index 679980a62aaf7..54a679a0509ea 100644 --- a/sycl/test-e2e/InlineAsm/asm_if.cpp +++ b/sycl/test-e2e/InlineAsm/asm_if.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include "include/asmhelper.h" -#include +#include using DataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_imm_arg.cpp b/sycl/test-e2e/InlineAsm/asm_imm_arg.cpp index d2a53f14691da..2506938a1bef8 100644 --- a/sycl/test-e2e/InlineAsm/asm_imm_arg.cpp +++ b/sycl/test-e2e/InlineAsm/asm_imm_arg.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include constexpr int CONST_ARGUMENT = 0xabc; diff --git a/sycl/test-e2e/InlineAsm/asm_loop.cpp b/sycl/test-e2e/InlineAsm/asm_loop.cpp index 56518dffbf4e3..eccc02ae1cab7 100644 --- a/sycl/test-e2e/InlineAsm/asm_loop.cpp +++ b/sycl/test-e2e/InlineAsm/asm_loop.cpp @@ -6,7 +6,7 @@ #include "include/asmhelper.h" #include #include -#include +#include #include using DataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_mul.cpp b/sycl/test-e2e/InlineAsm/asm_mul.cpp index b038a79f8abb1..df759b75d2a05 100644 --- a/sycl/test-e2e/InlineAsm/asm_mul.cpp +++ b/sycl/test-e2e/InlineAsm/asm_mul.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_no_operands.cpp b/sycl/test-e2e/InlineAsm/asm_no_operands.cpp index 5b770bb5baa0d..bf9df8ca40ae2 100644 --- a/sycl/test-e2e/InlineAsm/asm_no_operands.cpp +++ b/sycl/test-e2e/InlineAsm/asm_no_operands.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include "include/asmhelper.h" -#include +#include class no_operands_kernel; int main() { diff --git a/sycl/test-e2e/InlineAsm/asm_no_output.cpp b/sycl/test-e2e/InlineAsm/asm_no_output.cpp index 0622f4f8edb38..3a130f1e4b819 100644 --- a/sycl/test-e2e/InlineAsm/asm_no_output.cpp +++ b/sycl/test-e2e/InlineAsm/asm_no_output.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_plus_mod.cpp b/sycl/test-e2e/InlineAsm/asm_plus_mod.cpp index db2659d8e7077..077446cf72859 100644 --- a/sycl/test-e2e/InlineAsm/asm_plus_mod.cpp +++ b/sycl/test-e2e/InlineAsm/asm_plus_mod.cpp @@ -5,7 +5,7 @@ #include "include/asmhelper.h" #include -#include +#include #include using dataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InlineAsm/asm_switch.cpp b/sycl/test-e2e/InlineAsm/asm_switch.cpp index ccd81fd34af45..4f96a55b554d2 100644 --- a/sycl/test-e2e/InlineAsm/asm_switch.cpp +++ b/sycl/test-e2e/InlineAsm/asm_switch.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include "include/asmhelper.h" -#include +#include using DataType = sycl::opencl::cl_int; diff --git a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/ESIMD_to_SPMD.cpp b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/ESIMD_to_SPMD.cpp index 71343f736b370..e8a1c45ecf903 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/ESIMD_to_SPMD.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/ESIMD_to_SPMD.cpp @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/recurs.cpp b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/recurs.cpp index c84eaa0f4ce0a..840b21d9140c1 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/recurs.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/recurs.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_marray_argument.cpp b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_marray_argument.cpp index 1911e9b4d0e47..dc42670d4781d 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_marray_argument.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_marray_argument.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_vec_argument.cpp b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_vec_argument.cpp index 415260686e36a..93cbabf425188 100644 --- a/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_vec_argument.cpp +++ b/sycl/test-e2e/InvokeSimd/Spec/clang_run_error/sycl_vec_argument.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/KernelAndProgram/build-log.cpp b/sycl/test-e2e/KernelAndProgram/build-log.cpp index cd57692a8160a..89ec9960d197f 100644 --- a/sycl/test-e2e/KernelAndProgram/build-log.cpp +++ b/sycl/test-e2e/KernelAndProgram/build-log.cpp @@ -14,7 +14,7 @@ //===--------------------------------------------------------------===// #include -#include +#include SYCL_EXTERNAL void symbol_that_does_not_exist(); diff --git a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp index 90561a48c9edb..8b84140a28d77 100644 --- a/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp +++ b/sycl/test-e2e/KernelAndProgram/cache-build-result.cpp @@ -5,7 +5,7 @@ // RUN: %{build} -DSYCL_DISABLE_FALLBACK_ASSERT=1 -DGPU -o %t_gpu.out // RUN: env SYCL_CACHE_PERSISTENT=1 %{run} %if gpu %{ %t_gpu.out %} %else %{ %t.out %} -#include +#include SYCL_EXTERNAL void undefined(); diff --git a/sycl/test-e2e/KernelAndProgram/kernel-bundle-get-kernel.cpp b/sycl/test-e2e/KernelAndProgram/kernel-bundle-get-kernel.cpp index 96b6be8f3df2a..23a6acfcdf863 100644 --- a/sycl/test-e2e/KernelAndProgram/kernel-bundle-get-kernel.cpp +++ b/sycl/test-e2e/KernelAndProgram/kernel-bundle-get-kernel.cpp @@ -5,7 +5,7 @@ // kernel_bundle::get_kernel() is the same as a Kernel // object retrieved via other methods. -#include +#include class KernelA; diff --git a/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp b/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp index dad3872079b5d..9a2a8b5c5d046 100644 --- a/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp +++ b/sycl/test-e2e/KernelAndProgram/level-zero-link-flags.cpp @@ -12,7 +12,7 @@ // //===--------------------------------------------------------------===// -#include +#include class MyKernel; diff --git a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp index f904f62b0d825..8be17e24d229d 100644 --- a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp +++ b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp @@ -25,7 +25,7 @@ // CHECK: ---> piProgramLink // CHECK: ZE ---> zeModuleCreate -#include +#include class MyKernel; diff --git a/sycl/test-e2e/KernelAndProgram/multiple-kernel-linking.cpp b/sycl/test-e2e/KernelAndProgram/multiple-kernel-linking.cpp index f800f79144ad8..fd8b77de0846e 100644 --- a/sycl/test-e2e/KernelAndProgram/multiple-kernel-linking.cpp +++ b/sycl/test-e2e/KernelAndProgram/multiple-kernel-linking.cpp @@ -14,7 +14,7 @@ // RUN: %{run} %t_off.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp b/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp index ffb48e30bbbbe..70c5ce2bc64a5 100644 --- a/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp +++ b/sycl/test-e2e/KernelAndProgram/target_compile_fast.cpp @@ -11,7 +11,7 @@ // CHECK-WITHOUT-NOT: -igc_opts // CHECK-WITHOUT: ) ---> pi_result : PI_SUCCESS -#include +#include int main() { sycl::buffer Buffer(4); diff --git a/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp b/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp index 856d1510edfc5..7b1cd268bd0d6 100644 --- a/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp +++ b/sycl/test-e2e/KernelAndProgram/test_cache_jit_aot.cpp @@ -71,7 +71,7 @@ // RESULT2: Result (1): 2 // RESULT2: Result (2): 2 -#include +#include int main() { for (int i = 0; i < 3; ++i) { diff --git a/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp b/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp index 2a5c72161e46c..047f5bfbb970b 100644 --- a/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp +++ b/sycl/test-e2e/KernelAndProgram/undefined-symbol.cpp @@ -12,7 +12,7 @@ // //===--------------------------------------------------------------===// -#include +#include SYCL_EXTERNAL void symbol_that_does_not_exist(); diff --git a/sycl/test-e2e/KernelParams/array-kernel-param-nested-run.cpp b/sycl/test-e2e/KernelParams/array-kernel-param-nested-run.cpp index d8c0124a7f9cb..4285353e018b4 100644 --- a/sycl/test-e2e/KernelParams/array-kernel-param-nested-run.cpp +++ b/sycl/test-e2e/KernelParams/array-kernel-param-nested-run.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelParams/array-kernel-param-run.cpp b/sycl/test-e2e/KernelParams/array-kernel-param-run.cpp index 0234519fd48c8..6559532bd6c1d 100644 --- a/sycl/test-e2e/KernelParams/array-kernel-param-run.cpp +++ b/sycl/test-e2e/KernelParams/array-kernel-param-run.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelParams/non-standard-layout.cpp b/sycl/test-e2e/KernelParams/non-standard-layout.cpp index ac28aa1ea7635..39d6640b97c4f 100644 --- a/sycl/test-e2e/KernelParams/non-standard-layout.cpp +++ b/sycl/test-e2e/KernelParams/non-standard-layout.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelParams/struct_kernel_param.cpp b/sycl/test-e2e/KernelParams/struct_kernel_param.cpp index d6585602774c5..f204ee0ccaa8c 100644 --- a/sycl/test-e2e/KernelParams/struct_kernel_param.cpp +++ b/sycl/test-e2e/KernelParams/struct_kernel_param.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/KernelParams/union_kernel_param.cpp b/sycl/test-e2e/KernelParams/union_kernel_param.cpp index d6ee0b503ae6a..863be92971e27 100644 --- a/sycl/test-e2e/KernelParams/union_kernel_param.cpp +++ b/sycl/test-e2e/KernelParams/union_kernel_param.cpp @@ -4,7 +4,7 @@ // RUN: %{run} %t.out #include -#include +#include union TestUnion { public: diff --git a/sycl/test-e2e/NonUniformGroups/is_fixed_topology.cpp b/sycl/test-e2e/NonUniformGroups/is_fixed_topology.cpp index f602b4c8ec071..e8183ce41d3d2 100644 --- a/sycl/test-e2e/NonUniformGroups/is_fixed_topology.cpp +++ b/sycl/test-e2e/NonUniformGroups/is_fixed_topology.cpp @@ -1,6 +1,6 @@ // RUN: %{build} -fsyntax-only -o %t.out -#include +#include namespace syclex = sycl::ext::oneapi::experimental; #ifdef SYCL_EXT_ONEAPI_ROOT_GROUP diff --git a/sycl/test-e2e/OneapiDeviceSelector/level_zero_top.cpp b/sycl/test-e2e/OneapiDeviceSelector/level_zero_top.cpp index e7e42db4e4b3b..161726d40f6e6 100644 --- a/sycl/test-e2e/OneapiDeviceSelector/level_zero_top.cpp +++ b/sycl/test-e2e/OneapiDeviceSelector/level_zero_top.cpp @@ -9,7 +9,7 @@ // devices, not sub-devices. #include -#include +#include using namespace sycl; using namespace std; diff --git a/sycl/test-e2e/OneapiDeviceSelector/sub-devices.cpp b/sycl/test-e2e/OneapiDeviceSelector/sub-devices.cpp index f102fbb9fdb13..758584c8b5008 100644 --- a/sycl/test-e2e/OneapiDeviceSelector/sub-devices.cpp +++ b/sycl/test-e2e/OneapiDeviceSelector/sub-devices.cpp @@ -18,7 +18,7 @@ // RUN: env ONEAPI_DEVICE_SELECTOR="*:gpu" %{run-unfiltered-devices} %t.out 1 // RUN: %{run-unfiltered-devices} %t.out 1 -#include +#include using namespace sycl; int main(int Argc, const char *Argv[]) { diff --git a/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp b/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp index 50521e77b3577..ad2fe4ce2371a 100644 --- a/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp +++ b/sycl/test-e2e/OnlineCompiler/online_compiler_L0.cpp @@ -8,7 +8,7 @@ // re-used by other backends is kept in online_compiler_common.hpp file. #include -#include +#include #include diff --git a/sycl/test-e2e/OptionalKernelFeatures/esimd.cpp b/sycl/test-e2e/OptionalKernelFeatures/esimd.cpp index 2bd800460028f..4cc583b4c691a 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/esimd.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/esimd.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out #include -#include +#include int main() { sycl::queue Queue; diff --git a/sycl/test-e2e/OptionalKernelFeatures/fp64_relaxed.cpp b/sycl/test-e2e/OptionalKernelFeatures/fp64_relaxed.cpp index f6708382b40e1..969095bf5f552 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/fp64_relaxed.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/fp64_relaxed.cpp @@ -5,7 +5,7 @@ // Tests that aspect::fp64 requirements are affected by optimizations. -#include +#include int main() { sycl::queue Q; diff --git a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp index a0cd2e5a4447d..535c3647793c7 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/is_compatible/is_compatible_with_aspects.cpp @@ -4,7 +4,7 @@ // RUN: %{build} -O0 -o %t.out // RUN: %{run} %t.out -#include +#include [[sycl::device_has(sycl::aspect::cpu)]] void foo(){}; [[sycl::device_has(sycl::aspect::gpu)]] void bar(){}; diff --git a/sycl/test-e2e/OptionalKernelFeatures/no-speculative-compilation.cpp b/sycl/test-e2e/OptionalKernelFeatures/no-speculative-compilation.cpp index 2284d1a6758fc..3cc05cd101bda 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/no-speculative-compilation.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/no-speculative-compilation.cpp @@ -5,7 +5,7 @@ // i.e. there are no exceptions thrown about aspects fp16 or fp64 being // unsuppored on device. -#include +#include void foo(sycl::half &value) { value += sycl::half(1.0f); } diff --git a/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp b/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp index 15ebc62bfa792..3f38a4df3023d 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/sycl-external-with-optional-features.cpp @@ -5,7 +5,7 @@ #ifdef SOURCE1 #include -#include +#include using accT = sycl::accessor; constexpr int value = 42; @@ -34,7 +34,7 @@ int main() { #endif // SOURCE1 #ifdef SOURCE2 -#include +#include constexpr int value = 42; diff --git a/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp b/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp index 49a891105ea0c..aec4cc5297c8f 100644 --- a/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp +++ b/sycl/test-e2e/Plugin/enqueue-arg-order-buffer.cpp @@ -3,8 +3,8 @@ // RUN: env SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s #include -#include -#include + +#include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp b/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp index 2ae7d43c46665..8b66c121522e9 100644 --- a/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp +++ b/sycl/test-e2e/Plugin/enqueue-arg-order-image.cpp @@ -13,7 +13,7 @@ // RUN: env SYCL_HOST_UNIFIED_MEMORY=1 SYCL_PI_TRACE=2 %{run} %t.out | FileCheck %s #include -#include + #include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/interop-level-zero-buffer-multi-dim.cpp b/sycl/test-e2e/Plugin/interop-level-zero-buffer-multi-dim.cpp index aee595729ce5c..e6824a5fc0ecb 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-buffer-multi-dim.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-buffer-multi-dim.cpp @@ -5,7 +5,7 @@ // Test 2D and 3D interoperability buffers for the Level Zero backend. #include "interop-level-zero-buffer-helpers.hpp" -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero-buffer-ownership.cpp b/sycl/test-e2e/Plugin/interop-level-zero-buffer-ownership.cpp index 877c54870f3ad..7fa9c9895a38e 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-buffer-ownership.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-buffer-ownership.cpp @@ -29,7 +29,7 @@ // CHECK-NOT: zeMemFree #include "interop-level-zero-buffer-helpers.hpp" -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp b/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp index 4829f8f651633..9f097992de68d 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-buffer.cpp @@ -8,7 +8,7 @@ // Test interoperability buffer for the Level Zer backend #include -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp b/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp index 2cb4a9ee481e5..f769052dd3d00 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-get-native-mem.cpp @@ -13,7 +13,7 @@ // SYCL #include "interop-level-zero-buffer-helpers.hpp" #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp b/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp index dcedc9b1bfcfc..befdb27f439e2 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp @@ -5,7 +5,7 @@ // Test for Level Zero interop API where SYCL RT doesn't take ownership #include -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/interop-level-zero.cpp b/sycl/test-e2e/Plugin/interop-level-zero.cpp index dc796c02aebf6..fe9ce76da6a0d 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero.cpp @@ -6,7 +6,7 @@ // Test for Level Zero interop API #include -#include +#include // clang-format off #include #include diff --git a/sycl/test-e2e/Plugin/level-zero-usm-capabilities.cpp b/sycl/test-e2e/Plugin/level-zero-usm-capabilities.cpp index 59bb49348f0a7..dea18c8e3330d 100644 --- a/sycl/test-e2e/Plugin/level-zero-usm-capabilities.cpp +++ b/sycl/test-e2e/Plugin/level-zero-usm-capabilities.cpp @@ -3,7 +3,7 @@ // RUN: %{run} %t.out 2>&1 | FileCheck %s #include -#include +#include // Check for queries of USM capabilities. // All supported L0 devices have these capabilities currently: diff --git a/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp b/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp index 76890edaea93d..d4792c9177a28 100644 --- a/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp +++ b/sycl/test-e2e/Plugin/level_zero_batch_event_status.cpp @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include int main(void) { diff --git a/sycl/test-e2e/Plugin/level_zero_device_memory_clock_rate_and_bus_width.cpp b/sycl/test-e2e/Plugin/level_zero_device_memory_clock_rate_and_bus_width.cpp index bb31d169bcb24..86518fde9343d 100644 --- a/sycl/test-e2e/Plugin/level_zero_device_memory_clock_rate_and_bus_width.cpp +++ b/sycl/test-e2e/Plugin/level_zero_device_memory_clock_rate_and_bus_width.cpp @@ -10,7 +10,7 @@ // CHECK: Memory bus width #include -#include +#include using namespace sycl; int main() { diff --git a/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp b/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp index c7603c038645f..b1aa94d280682 100644 --- a/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp +++ b/sycl/test-e2e/Plugin/level_zero_device_scope_events.cpp @@ -30,7 +30,7 @@ // clang-format on #include -#include +#include int main(int argc, char **argv) { sycl::queue queue(sycl::gpu_selector_v); diff --git a/sycl/test-e2e/Plugin/level_zero_imm_cmdlist_per_thread.cpp b/sycl/test-e2e/Plugin/level_zero_imm_cmdlist_per_thread.cpp index 51e692c9329b2..153af5c5a1f9d 100644 --- a/sycl/test-e2e/Plugin/level_zero_imm_cmdlist_per_thread.cpp +++ b/sycl/test-e2e/Plugin/level_zero_imm_cmdlist_per_thread.cpp @@ -13,7 +13,7 @@ // CHECK-ONE-CMDLIST: zeCommandListCreateImmediate = 2 // CHECK-PER-THREAD-CMDLIST: zeCommandListCreateImmediate = 4 -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Plugin/level_zero_queue_create.cpp b/sycl/test-e2e/Plugin/level_zero_queue_create.cpp index 598ec12e8f9e0..a0330c4d5e56d 100644 --- a/sycl/test-e2e/Plugin/level_zero_queue_create.cpp +++ b/sycl/test-e2e/Plugin/level_zero_queue_create.cpp @@ -8,7 +8,7 @@ // with the embedded UR_L0_LEAKS_DEBUG=1 testing capability. // -#include +#include int main(int argc, char **argv) { sycl::queue Q; diff --git a/sycl/test-e2e/Plugin/level_zero_track_indirect_access_memory.cpp b/sycl/test-e2e/Plugin/level_zero_track_indirect_access_memory.cpp index 1171b252f2ff3..40cfee5b66c0e 100644 --- a/sycl/test-e2e/Plugin/level_zero_track_indirect_access_memory.cpp +++ b/sycl/test-e2e/Plugin/level_zero_track_indirect_access_memory.cpp @@ -30,7 +30,7 @@ #define LENGTH 10 -#include +#include using namespace sycl; void update_d2_data(queue &q) { diff --git a/sycl/test-e2e/Plugin/sycl-partition-info.cpp b/sycl/test-e2e/Plugin/sycl-partition-info.cpp index c52fb2c1ffc44..e6f911f541d3e 100644 --- a/sycl/test-e2e/Plugin/sycl-partition-info.cpp +++ b/sycl/test-e2e/Plugin/sycl-partition-info.cpp @@ -5,7 +5,7 @@ // supports ensure we are only returning SYCL standard partition properties. #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp b/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp index e724855d296c3..7a14c4067b898 100644 --- a/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp +++ b/sycl/test-e2e/PreviewBreakingChanges/preview_lib_marker.cpp @@ -6,7 +6,7 @@ // Test to help identify that E2E testing correctly detects and uses the preview // library. -#include +#include namespace sycl { inline namespace _V1 { diff --git a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp index e454a85151c26..d7b5d55161107 100644 --- a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp +++ b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-level-zero.cpp @@ -19,7 +19,7 @@ // -O2 | -ze-opt-level=2 // -O3 | -ze-opt-level=2 -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp index d0c49dc4fc698..c5c9fc6074dab 100644 --- a/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp +++ b/sycl/test-e2e/PropagateOptionsToBackend/sycl-opt-level-opencl.cpp @@ -22,7 +22,7 @@ // -O2 | /* no option */ // -O3 | /* no option */ -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/README.md b/sycl/test-e2e/README.md index 2564d7172a5a6..0bc6b8f1e7896 100644 --- a/sycl/test-e2e/README.md +++ b/sycl/test-e2e/README.md @@ -8,6 +8,7 @@ * [Creating or modifying tests](#creating-or-modifying-tests) * [LIT feature checks](#lit-feature-checks) * [llvm-lit parameters](#llvm-lit-parameters) + * [sycl/detail/core.hpp header file](#sycl/detail/core.hpp) # Overview This directory contains SYCL-related tests distributed in subdirectories based @@ -282,3 +283,15 @@ llvm-lit --param dpcpp_compiler=path/to/clang++ --param dump_ir=True \ SYCL/External/RSBench ``` +## sycl/detail/core.hpp + +While SYCL specification dictates that the only user-visible interface is +`` header file we found out that as the implementation and +multiple extensions grew, the compile time was getting worse and worse, +negatively affecting our CI turnaround time. We are just starting some efforts +to create a much smaller set of basic feature needed for every SYCL end-to-end +test/program so that this issue could be somewhat mitigated. This activity is in +its early stage and NO production code should rely on it. It WILL be changed as +we go with our experiments. For any code outside of this project only the +`` must be used until we feel confident to propose an extension +that can provide an alternative. diff --git a/sycl/test-e2e/Reduction/reduction_dynamic_span.cpp b/sycl/test-e2e/Reduction/reduction_dynamic_span.cpp index 55d7b7e8387e6..b25ff9c8de49b 100644 --- a/sycl/test-e2e/Reduction/reduction_dynamic_span.cpp +++ b/sycl/test-e2e/Reduction/reduction_dynamic_span.cpp @@ -1,6 +1,6 @@ // RUN: not %{build} -fsyntax-only -o %t.out -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Regression/DAE-separate-compile.cpp b/sycl/test-e2e/Regression/DAE-separate-compile.cpp index f05095f36cc7e..a6c8ae885cb39 100644 --- a/sycl/test-e2e/Regression/DAE-separate-compile.cpp +++ b/sycl/test-e2e/Regression/DAE-separate-compile.cpp @@ -13,7 +13,7 @@ #include -#include +#include int main() { constexpr int THE_ANSWER = 42; diff --git a/sycl/test-e2e/Regression/atomic_load.cpp b/sycl/test-e2e/Regression/atomic_load.cpp index 15aaefd4a6ea8..8771db61db935 100644 --- a/sycl/test-e2e/Regression/atomic_load.cpp +++ b/sycl/test-e2e/Regression/atomic_load.cpp @@ -1,6 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include using namespace sycl; template class foo; diff --git a/sycl/test-e2e/Regression/cache_test.cpp b/sycl/test-e2e/Regression/cache_test.cpp index 1d2255ee8885f..3d25323c88026 100644 --- a/sycl/test-e2e/Regression/cache_test.cpp +++ b/sycl/test-e2e/Regression/cache_test.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include using namespace sycl::ext::oneapi; diff --git a/sycl/test-e2e/Regression/device_num.cpp b/sycl/test-e2e/Regression/device_num.cpp index 4bb1bda94d114..efa7a82ef25f7 100644 --- a/sycl/test-e2e/Regression/device_num.cpp +++ b/sycl/test-e2e/Regression/device_num.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include using namespace sycl; using namespace std; diff --git a/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp b/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp index 2fcd96e8ad903..79ef8c29caa1e 100644 --- a/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp +++ b/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include #include diff --git a/sycl/test-e2e/Regression/empty_accessor_use.cpp b/sycl/test-e2e/Regression/empty_accessor_use.cpp index 977d687a3d7a5..869a5526c389e 100644 --- a/sycl/test-e2e/Regression/empty_accessor_use.cpp +++ b/sycl/test-e2e/Regression/empty_accessor_use.cpp @@ -4,7 +4,7 @@ // Tests that 3D accessors with 0 elements are allowed to be captured in a // kernel. -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Regression/fp16-with-unnamed-lambda.cpp b/sycl/test-e2e/Regression/fp16-with-unnamed-lambda.cpp index 8a24469b298b5..580d99cfc15f1 100644 --- a/sycl/test-e2e/Regression/fp16-with-unnamed-lambda.cpp +++ b/sycl/test-e2e/Regression/fp16-with-unnamed-lambda.cpp @@ -1,7 +1,7 @@ // REQUIRES: aspect-fp16 // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include #include diff --git a/sycl/test-e2e/Regression/fsycl-host-compiler.cpp b/sycl/test-e2e/Regression/fsycl-host-compiler.cpp index 92375484109bc..bb08f7955da1f 100644 --- a/sycl/test-e2e/Regression/fsycl-host-compiler.cpp +++ b/sycl/test-e2e/Regression/fsycl-host-compiler.cpp @@ -11,7 +11,7 @@ // // Uses -fsycl-host-compiler= on a simple test, requires 'g++' -#include +#include #ifndef DEFINE_CHECK #error predefined macro not set diff --git a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp index 4e5635a89eb9a..eb910425ea8d4 100644 --- a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp +++ b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Regression/global_queue.cpp b/sycl/test-e2e/Regression/global_queue.cpp index b443b88de3c8f..169b5ce6872c8 100644 --- a/sycl/test-e2e/Regression/global_queue.cpp +++ b/sycl/test-e2e/Regression/global_queue.cpp @@ -9,7 +9,7 @@ // use reverse order to call destructors, and low level runtime's objects are // destroyed before global queue in user code. -#include +#include sycl::queue Queue; diff --git a/sycl/test-e2e/Regression/host_unified_memory.cpp b/sycl/test-e2e/Regression/host_unified_memory.cpp index 77a6fecb6c38f..a679111611d29 100644 --- a/sycl/test-e2e/Regression/host_unified_memory.cpp +++ b/sycl/test-e2e/Regression/host_unified_memory.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include using namespace sycl; diff --git a/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp b/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp index 0d444e07ba487..ee89b993eedce 100644 --- a/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp +++ b/sycl/test-e2e/Regression/implicit_atomic_conversion.cpp @@ -1,7 +1,7 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Regression/isordered.cpp b/sycl/test-e2e/Regression/isordered.cpp index 8d8fa28919a4b..7d3d0d32b688c 100644 --- a/sycl/test-e2e/Regression/isordered.cpp +++ b/sycl/test-e2e/Regression/isordered.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUNx: %{run} %t.out -#include +#include + +#include int main() { sycl::range<1> ndRng(3); diff --git a/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp b/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp index 21d20fd719228..799a69854d4e9 100644 --- a/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp +++ b/sycl/test-e2e/Regression/kernel_bundle_ignore_sycl_external.cpp @@ -7,7 +7,7 @@ // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows -#include +#include class KernelName; diff --git a/sycl/test-e2e/Regression/kernel_name_class.cpp b/sycl/test-e2e/Regression/kernel_name_class.cpp index a6a9b0d6da084..e777a22309e73 100644 --- a/sycl/test-e2e/Regression/kernel_name_class.cpp +++ b/sycl/test-e2e/Regression/kernel_name_class.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include #define GOLD 10 diff --git a/sycl/test-e2e/Regression/kernel_unnamed.cpp b/sycl/test-e2e/Regression/kernel_unnamed.cpp index 994e2ff266dc2..bff1a8b82cc70 100644 --- a/sycl/test-e2e/Regression/kernel_unnamed.cpp +++ b/sycl/test-e2e/Regression/kernel_unnamed.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include #define GOLD 10 static int NumTestCases = 0; diff --git a/sycl/test-e2e/Regression/mad_sat.cpp b/sycl/test-e2e/Regression/mad_sat.cpp index 117a7104ba06c..570b2307a3ae4 100644 --- a/sycl/test-e2e/Regression/mad_sat.cpp +++ b/sycl/test-e2e/Regression/mad_sat.cpp @@ -1,7 +1,9 @@ // RUN: %{build} -o %t.out // RUNx: %{run} %t.out -#include +#include + +#include int main() { sycl::queue testQueue; diff --git a/sycl/test-e2e/Regression/nontrivial_device_copyable_value.cpp b/sycl/test-e2e/Regression/nontrivial_device_copyable_value.cpp index f4408139625e4..6c8c41292c2b8 100644 --- a/sycl/test-e2e/Regression/nontrivial_device_copyable_value.cpp +++ b/sycl/test-e2e/Regression/nontrivial_device_copyable_value.cpp @@ -4,7 +4,7 @@ // Note: Tests that non-trivially copyable types marked as device-copyable are // copied and used correctly on the device. -#include +#include #include diff --git a/sycl/test-e2e/Regression/optimization_level_debug_info_intopt.cpp b/sycl/test-e2e/Regression/optimization_level_debug_info_intopt.cpp index 07cb1f655d7b0..106460629b242 100644 --- a/sycl/test-e2e/Regression/optimization_level_debug_info_intopt.cpp +++ b/sycl/test-e2e/Regression/optimization_level_debug_info_intopt.cpp @@ -6,7 +6,7 @@ // NOTE: Tests that debugging information can be generated for all integral // optimization levels. -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp b/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp index 5856e6f084c29..f7c9aad155b43 100644 --- a/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp +++ b/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp @@ -7,7 +7,7 @@ // NOTE: Tests that debugging information can be generated for all special-name // optimization levels. -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Regression/pi_release.cpp b/sycl/test-e2e/Regression/pi_release.cpp index 3b4d79d13caed..84fbee6417b8e 100644 --- a/sycl/test-e2e/Regression/pi_release.cpp +++ b/sycl/test-e2e/Regression/pi_release.cpp @@ -2,7 +2,7 @@ // RUN: %{build} -o %t.out // RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s -#include +#include int main() { sycl::queue q; diff --git a/sycl/test-e2e/Regression/private_array_init_test.cpp b/sycl/test-e2e/Regression/private_array_init_test.cpp index b822c8aea684d..286204bf8e12f 100644 --- a/sycl/test-e2e/Regression/private_array_init_test.cpp +++ b/sycl/test-e2e/Regression/private_array_init_test.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include namespace s = sycl; diff --git a/sycl/test-e2e/Regression/range-rounding-this-id.cpp b/sycl/test-e2e/Regression/range-rounding-this-id.cpp index a5970adf46e41..03111edc87f1f 100644 --- a/sycl/test-e2e/Regression/range-rounding-this-id.cpp +++ b/sycl/test-e2e/Regression/range-rounding-this-id.cpp @@ -4,7 +4,7 @@ // RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_PARAMS=16:32:0 \ // RUN: SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 \ // RUN: %{run} %t.out | FileCheck %s -#include +#include constexpr int N = 3; diff --git a/sycl/test-e2e/Regression/same_unnamed_kernels.cpp b/sycl/test-e2e/Regression/same_unnamed_kernels.cpp index dff1cb6d7d1ae..13460332777ce 100644 --- a/sycl/test-e2e/Regression/same_unnamed_kernels.cpp +++ b/sycl/test-e2e/Regression/same_unnamed_kernels.cpp @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#include +#include template void run(sycl::queue &q, B &buf, const F &func) { diff --git a/sycl/test-e2e/Regression/static-buffer-dtor.cpp b/sycl/test-e2e/Regression/static-buffer-dtor.cpp index b2e49467af7d0..8ff9328d6535d 100644 --- a/sycl/test-e2e/Regression/static-buffer-dtor.cpp +++ b/sycl/test-e2e/Regression/static-buffer-dtor.cpp @@ -18,7 +18,7 @@ // Windows doesn't yet have full shutdown(). // UNSUPPORTED: ze_debug && windows -#include +#include int main() { uint8_t *h_A = (uint8_t *)malloc(256); diff --git a/sycl/test-e2e/Regression/subalign_no_alloc.cpp b/sycl/test-e2e/Regression/subalign_no_alloc.cpp index b9661a120b734..a2348c1662f08 100644 --- a/sycl/test-e2e/Regression/subalign_no_alloc.cpp +++ b/sycl/test-e2e/Regression/subalign_no_alloc.cpp @@ -4,7 +4,7 @@ // Tests that a type with a different alignment from its size does not cause // the runtime to reallocate memory. -#include +#include #include diff --git a/sycl/test-e2e/Regression/vec_rel_swizzle_ops.cpp b/sycl/test-e2e/Regression/vec_rel_swizzle_ops.cpp index 14669f2dda1ef..c7f2bd282f527 100644 --- a/sycl/test-e2e/Regression/vec_rel_swizzle_ops.cpp +++ b/sycl/test-e2e/Regression/vec_rel_swizzle_ops.cpp @@ -5,7 +5,7 @@ // RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} #include -#include +#include template bool testAndOperator(const std::string &typeName) { diff --git a/sycl/test-e2e/Scheduler/BasicSchedulerTests.cpp b/sycl/test-e2e/Scheduler/BasicSchedulerTests.cpp index 987625577dc95..a644c9c9a95ae 100644 --- a/sycl/test-e2e/Scheduler/BasicSchedulerTests.cpp +++ b/sycl/test-e2e/Scheduler/BasicSchedulerTests.cpp @@ -8,7 +8,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include diff --git a/sycl/test-e2e/Scheduler/CommandCleanupThreadSafety.cpp b/sycl/test-e2e/Scheduler/CommandCleanupThreadSafety.cpp index a67f3371ba16d..36c16320196a6 100644 --- a/sycl/test-e2e/Scheduler/CommandCleanupThreadSafety.cpp +++ b/sycl/test-e2e/Scheduler/CommandCleanupThreadSafety.cpp @@ -2,7 +2,7 @@ // RUN: %{build} -o %t.out -lpthread // RUN: %{run} %t.out -#include +#include #include #include diff --git a/sycl/test-e2e/Scheduler/DataMovement.cpp b/sycl/test-e2e/Scheduler/DataMovement.cpp index 557a3130d1e62..afe7790ab7601 100644 --- a/sycl/test-e2e/Scheduler/DataMovement.cpp +++ b/sycl/test-e2e/Scheduler/DataMovement.cpp @@ -12,7 +12,7 @@ // The test checks that no additional host allocation is performed by the SYCL // RT if host ptr is used -#include +#include #include diff --git a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp index 5406108554918..9c87e903c6b1f 100644 --- a/sycl/test-e2e/Scheduler/HostAccDestruction.cpp +++ b/sycl/test-e2e/Scheduler/HostAccDestruction.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include -#include +#include int main() { size_t size = 3; diff --git a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp index 3c3c40c71c86d..9d56822ff4d69 100644 --- a/sycl/test-e2e/Scheduler/MemObjRemapping.cpp +++ b/sycl/test-e2e/Scheduler/MemObjRemapping.cpp @@ -4,7 +4,7 @@ // XFAIL: hip_nvidia #include #include -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/Scheduler/MultipleDevices.cpp b/sycl/test-e2e/Scheduler/MultipleDevices.cpp index bc5bb1b0172f5..fb7982c678520 100644 --- a/sycl/test-e2e/Scheduler/MultipleDevices.cpp +++ b/sycl/test-e2e/Scheduler/MultipleDevices.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include diff --git a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp index 77e69a340b8f1..eb8fd2c9f5eba 100644 --- a/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp +++ b/sycl/test-e2e/Scheduler/ReleaseResourcesTest.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include +#include #include "../helpers.hpp" diff --git a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp index 65e52c6ca44c2..22e742e0d6c25 100644 --- a/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp +++ b/sycl/test-e2e/Scheduler/SubBufferRemapping.cpp @@ -20,7 +20,7 @@ // CHECK-NEXT: : // CHECK-NEXT: : 3 -#include +#include int main(int argc, const char **argv) { diff --git a/sycl/test-e2e/SeparateCompile/same-kernel.cpp b/sycl/test-e2e/SeparateCompile/same-kernel.cpp index baad7c5bb55f3..15a2bfc9bc5c5 100644 --- a/sycl/test-e2e/SeparateCompile/same-kernel.cpp +++ b/sycl/test-e2e/SeparateCompile/same-kernel.cpp @@ -15,7 +15,7 @@ // RUN: %clangxx %t-same-kernel-a.o %t-same-kernel-b.o -o %t-same-kernel.exe -fsycl -fsycl-targets=%{sycl_triple} // RUN: %{run} %t-same-kernel.exe -#include +#include using namespace sycl; diff --git a/sycl/test-e2e/SeparateCompile/sycl-external.cpp b/sycl/test-e2e/SeparateCompile/sycl-external.cpp index 008c200f3a63a..25a95a2f32288 100644 --- a/sycl/test-e2e/SeparateCompile/sycl-external.cpp +++ b/sycl/test-e2e/SeparateCompile/sycl-external.cpp @@ -13,7 +13,7 @@ // RUN: %{run} %t.exe #include -#include +#include #ifdef SOURCE1 int bar(int b); diff --git a/sycl/test-e2e/SeparateCompile/test.cpp b/sycl/test-e2e/SeparateCompile/test.cpp index 6d6e7f6bfaf52..5c7c34c204cec 100644 --- a/sycl/test-e2e/SeparateCompile/test.cpp +++ b/sycl/test-e2e/SeparateCompile/test.cpp @@ -65,7 +65,7 @@ #ifdef B_CPP // ----------------------------------------------------------------------------- #include -#include +#include int run_test_b(int v) { int arr[] = {v}; @@ -84,7 +84,7 @@ int run_test_b(int v) { // ----------------------------------------------------------------------------- #include -#include +#include using namespace std; diff --git a/sycl/test-e2e/SharedLib/use_when_link.cpp b/sycl/test-e2e/SharedLib/use_when_link.cpp index 3cedcafe0cdc6..6c74149e60c89 100644 --- a/sycl/test-e2e/SharedLib/use_when_link.cpp +++ b/sycl/test-e2e/SharedLib/use_when_link.cpp @@ -8,7 +8,7 @@ // RUN: %{build} -L%T -o %t.out -l%basename_t -Wl,-rpath=%T // RUN: %{run} %t.out -#include +#include #include diff --git a/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp b/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp index 39c7e5cdeb76b..04f67fe9fad3c 100644 --- a/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp +++ b/sycl/test-e2e/SharedLib/use_when_link_verify_cache.cpp @@ -8,7 +8,7 @@ // RUN: %{build} -L%T -o %t.out -l%basename_t -Wl,-rpath=%T // RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild -#include +#include #include diff --git a/sycl/test-e2e/SharedLib/use_with_dlopen.cpp b/sycl/test-e2e/SharedLib/use_with_dlopen.cpp index e98b8545b28fc..d5fd6f8fd5150 100644 --- a/sycl/test-e2e/SharedLib/use_with_dlopen.cpp +++ b/sycl/test-e2e/SharedLib/use_with_dlopen.cpp @@ -17,7 +17,7 @@ // RUNx: %{compile} -DRUN_LAST // RUNx: %{run} %t.out -#include +#include #include #include diff --git a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp index 1247ac0bed6eb..de7dc3dd3897b 100644 --- a/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp +++ b/sycl/test-e2e/SharedLib/use_with_dlopen_verify_cache.cpp @@ -19,7 +19,7 @@ // RUNx: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-LAST,CHECK --implicit-check-not=piProgramBuild // clang-format on -#include +#include #include #include diff --git a/sycl/test-e2e/Tracing/buffer_printers.cpp b/sycl/test-e2e/Tracing/buffer_printers.cpp index 6854d506cb582..3f3a3c9aea858 100644 --- a/sycl/test-e2e/Tracing/buffer_printers.cpp +++ b/sycl/test-e2e/Tracing/buffer_printers.cpp @@ -3,7 +3,7 @@ // // XFAIL: hip_nvidia -#include +#include #include // Test image-specific printers of the Plugin Interace diff --git a/sycl/test-e2e/Tracing/pi_tracing_test.cpp b/sycl/test-e2e/Tracing/pi_tracing_test.cpp index 635980517e6c6..78bb0c31c1d9d 100644 --- a/sycl/test-e2e/Tracing/pi_tracing_test.cpp +++ b/sycl/test-e2e/Tracing/pi_tracing_test.cpp @@ -42,7 +42,7 @@ // CHECK-SAME: [ {{0[xX]?[0-9a-fA-F]*}} ... ] // CHECK-NEXT: ) ---> pi_result : PI_SUCCESS -#include +#include int main() { sycl::queue Queue; sycl::buffer Buf(10); diff --git a/sycl/test-e2e/XPTI/buffer/host_array.cpp b/sycl/test-e2e/XPTI/buffer/host_array.cpp index c7fc506961b47..2574f93ae53f6 100644 --- a/sycl/test-e2e/XPTI/buffer/host_array.cpp +++ b/sycl/test-e2e/XPTI/buffer/host_array.cpp @@ -9,7 +9,7 @@ #else #include -#include +#include int main() { bool MismatchFound = false; diff --git a/sycl/test-e2e/XPTI/buffer/in_cycle.cpp b/sycl/test-e2e/XPTI/buffer/in_cycle.cpp index 0021a97b7cfb9..75fd3b3b96e85 100644 --- a/sycl/test-e2e/XPTI/buffer/in_cycle.cpp +++ b/sycl/test-e2e/XPTI/buffer/in_cycle.cpp @@ -10,7 +10,7 @@ #else #include -#include +#include bool func(sycl::queue &Queue, int depth = 0) { bool MismatchFound = false; // Create a buffer of 4 ints to be used inside the kernel code. diff --git a/sycl/test-e2e/XPTI/buffer/multiple_buffers.cpp b/sycl/test-e2e/XPTI/buffer/multiple_buffers.cpp index 9e8e21e6ea0f4..874f81c11b481 100644 --- a/sycl/test-e2e/XPTI/buffer/multiple_buffers.cpp +++ b/sycl/test-e2e/XPTI/buffer/multiple_buffers.cpp @@ -9,7 +9,7 @@ #else #include -#include +#include int main() { bool MismatchFound = false; diff --git a/sycl/test-e2e/XPTI/buffer/recursion.cpp b/sycl/test-e2e/XPTI/buffer/recursion.cpp index 0086dfb586810..a8a4f4ab65a2c 100644 --- a/sycl/test-e2e/XPTI/buffer/recursion.cpp +++ b/sycl/test-e2e/XPTI/buffer/recursion.cpp @@ -10,7 +10,7 @@ #else #include -#include +#include bool func(sycl::queue &Queue, int depth = 0) { bool MismatchFound = false; // Create a buffer of 4 ints to be used inside the kernel code. diff --git a/sycl/test-e2e/XPTI/buffer/sub_buffer.cpp b/sycl/test-e2e/XPTI/buffer/sub_buffer.cpp index f4a7e2001b18b..7bc46f33baa8e 100644 --- a/sycl/test-e2e/XPTI/buffer/sub_buffer.cpp +++ b/sycl/test-e2e/XPTI/buffer/sub_buffer.cpp @@ -9,7 +9,7 @@ #else #include -#include +#include int main() { bool MismatchFound = false; diff --git a/sycl/test-e2e/XPTI/buffer/use_host_ptr.cpp b/sycl/test-e2e/XPTI/buffer/use_host_ptr.cpp index e2f4b997afc79..23e0f2e44ff80 100644 --- a/sycl/test-e2e/XPTI/buffer/use_host_ptr.cpp +++ b/sycl/test-e2e/XPTI/buffer/use_host_ptr.cpp @@ -10,7 +10,7 @@ #else #include -#include +#include int main() { bool MismatchFound = false; sycl::queue Queue{}; diff --git a/sycl/test-e2e/XPTI/image/accessors.cpp b/sycl/test-e2e/XPTI/image/accessors.cpp index d428c4095b779..70a11fdf9f767 100644 --- a/sycl/test-e2e/XPTI/image/accessors.cpp +++ b/sycl/test-e2e/XPTI/image/accessors.cpp @@ -9,7 +9,7 @@ #else -#include +#include using namespace sycl::access; diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_arith.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_arith.cpp index bbd406a35e69e..30a8853aed05e 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_arith.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_arith.cpp @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_bitwise.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_bitwise.cpp index f212701c10572..544eb09ba8d82 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_bitwise.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_bitwise.cpp @@ -40,7 +40,7 @@ #include -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_comp_exchange.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_comp_exchange.cpp index b315816034a51..ce66db044ee8a 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_comp_exchange.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_comp_exchange.cpp @@ -39,7 +39,7 @@ #include -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp index 4ccc67fbff53e..2acaa85022f0b 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_memory_acq_rel.cpp @@ -38,7 +38,7 @@ #include #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/atomic/atomic_minmax.cpp b/sycl/test-e2e/syclcompat/atomic/atomic_minmax.cpp index 4dd5cd3a634a1..a7fe71ce4c995 100644 --- a/sycl/test-e2e/syclcompat/atomic/atomic_minmax.cpp +++ b/sycl/test-e2e/syclcompat/atomic/atomic_minmax.cpp @@ -37,7 +37,7 @@ #include -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/dim.cpp b/sycl/test-e2e/syclcompat/dim.cpp index 69d4d487d49a4..b4f1a1595bde5 100644 --- a/sycl/test-e2e/syclcompat/dim.cpp +++ b/sycl/test-e2e/syclcompat/dim.cpp @@ -24,7 +24,7 @@ // RUN: %{run} %t.out #include -#include +#include #include int main() { diff --git a/sycl/test-e2e/syclcompat/math/math_length_test.cpp b/sycl/test-e2e/syclcompat/math/math_length_test.cpp index 63fe324fd3a16..929395d01e309 100644 --- a/sycl/test-e2e/syclcompat/math/math_length_test.cpp +++ b/sycl/test-e2e/syclcompat/math/math_length_test.cpp @@ -35,7 +35,7 @@ #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/math/math_vectorized_isgreater_test.cpp b/sycl/test-e2e/syclcompat/math/math_vectorized_isgreater_test.cpp index e5b5ec5202a28..15f6372a97539 100644 --- a/sycl/test-e2e/syclcompat/math/math_vectorized_isgreater_test.cpp +++ b/sycl/test-e2e/syclcompat/math/math_vectorized_isgreater_test.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void test_kernel_vect_is_greater_1(unsigned int vect_count, diff --git a/sycl/test-e2e/syclcompat/math/math_vectorized_max_test.cpp b/sycl/test-e2e/syclcompat/math/math_vectorized_max_test.cpp index b5c30c4d6441a..326bf8b8ccfc1 100644 --- a/sycl/test-e2e/syclcompat/math/math_vectorized_max_test.cpp +++ b/sycl/test-e2e/syclcompat/math/math_vectorized_max_test.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void test_kernel_vect_max(unsigned int vect_count, unsigned int *input_1, diff --git a/sycl/test-e2e/syclcompat/math/math_vectorized_min_test.cpp b/sycl/test-e2e/syclcompat/math/math_vectorized_min_test.cpp index 6a83ea8c3e92a..d303337633173 100644 --- a/sycl/test-e2e/syclcompat/math/math_vectorized_min_test.cpp +++ b/sycl/test-e2e/syclcompat/math/math_vectorized_min_test.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void test_kernel_vect_min(unsigned int vect_count, unsigned int *input_1, diff --git a/sycl/test-e2e/syclcompat/memory/memcpy_3d.cpp b/sycl/test-e2e/syclcompat/memory/memcpy_3d.cpp index 2a78ca8fdf1fe..5c6fb48a40e3b 100644 --- a/sycl/test-e2e/syclcompat/memory/memcpy_3d.cpp +++ b/sycl/test-e2e/syclcompat/memory/memcpy_3d.cpp @@ -35,7 +35,7 @@ #include #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/memory/memcpy_3d2.cpp b/sycl/test-e2e/syclcompat/memory/memcpy_3d2.cpp index 24b3039194344..0d15e042bd307 100644 --- a/sycl/test-e2e/syclcompat/memory/memcpy_3d2.cpp +++ b/sycl/test-e2e/syclcompat/memory/memcpy_3d2.cpp @@ -34,7 +34,7 @@ #include #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/memory/memory_async.cpp b/sycl/test-e2e/syclcompat/memory/memory_async.cpp index 7396fcb872953..3eb4123014497 100644 --- a/sycl/test-e2e/syclcompat/memory/memory_async.cpp +++ b/sycl/test-e2e/syclcompat/memory/memory_async.cpp @@ -37,7 +37,7 @@ #include -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/memory/memory_management_diff_queues.cpp b/sycl/test-e2e/syclcompat/memory/memory_management_diff_queues.cpp index 008cdb14ec36c..3b3b54914e999 100644 --- a/sycl/test-e2e/syclcompat/memory/memory_management_diff_queues.cpp +++ b/sycl/test-e2e/syclcompat/memory/memory_management_diff_queues.cpp @@ -23,7 +23,7 @@ // RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/memory/memory_management_test3.cpp b/sycl/test-e2e/syclcompat/memory/memory_management_test3.cpp index 7d808306d9f01..0d35a0721bbc7 100644 --- a/sycl/test-e2e/syclcompat/memory/memory_management_test3.cpp +++ b/sycl/test-e2e/syclcompat/memory/memory_management_test3.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -std=c++20 -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include diff --git a/sycl/test-e2e/syclcompat/util/util_cast_value_test.cpp b/sycl/test-e2e/syclcompat/util/util_cast_value_test.cpp index eb6ab964ebf3f..c3d5b23ddd0ba 100644 --- a/sycl/test-e2e/syclcompat/util/util_cast_value_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_cast_value_test.cpp @@ -35,7 +35,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #include diff --git a/sycl/test-e2e/syclcompat/util/util_find_first_set.cpp b/sycl/test-e2e/syclcompat/util/util_find_first_set.cpp index f3416fa8f5ce4..8a38c0dfdce56 100644 --- a/sycl/test-e2e/syclcompat/util/util_find_first_set.cpp +++ b/sycl/test-e2e/syclcompat/util/util_find_first_set.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void find_first_set_test(int *test_result) { diff --git a/sycl/test-e2e/syclcompat/util/util_logical_group.cpp b/sycl/test-e2e/syclcompat/util/util_logical_group.cpp index 1de54cf5eb6ec..9e31ca24ba499 100644 --- a/sycl/test-e2e/syclcompat/util/util_logical_group.cpp +++ b/sycl/test-e2e/syclcompat/util/util_logical_group.cpp @@ -36,7 +36,7 @@ #include -#include +#include #include // work-item: diff --git a/sycl/test-e2e/syclcompat/util/util_matrix_mem_copy_test.cpp b/sycl/test-e2e/syclcompat/util/util_matrix_mem_copy_test.cpp index 7e606cfb6b7ec..7bec92ccb9ae3 100644 --- a/sycl/test-e2e/syclcompat/util/util_matrix_mem_copy_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_matrix_mem_copy_test.cpp @@ -34,7 +34,7 @@ // RUN: %{run} %t.out #include -#include +#include #include #define M 3 diff --git a/sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp b/sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp index b7a19f1df7da5..9a4ebe441352c 100644 --- a/sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_nd_range_barrier_test.cpp @@ -37,7 +37,7 @@ #include #include -#include +#include #include void kernel_1( diff --git a/sycl/test-e2e/syclcompat/util/util_perm_byte_test.cpp b/sycl/test-e2e/syclcompat/util/util_perm_byte_test.cpp index b4578e3c4ef5b..7e3f0e3b523fe 100644 --- a/sycl/test-e2e/syclcompat/util/util_perm_byte_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_perm_byte_test.cpp @@ -35,7 +35,7 @@ #include #include -#include +#include #include void byte_perm_ref(unsigned int *d_data) { diff --git a/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp b/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp index f544b9d1f277f..75c0382053e69 100644 --- a/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp +++ b/sycl/test-e2e/syclcompat/util/util_permute_sub_group_by_xor.cpp @@ -34,7 +34,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #define WARP_SIZE 32 diff --git a/sycl/test-e2e/syclcompat/util/util_reverse_bits_test.cpp b/sycl/test-e2e/syclcompat/util/util_reverse_bits_test.cpp index a9a339874a9ca..32d0594f821dc 100644 --- a/sycl/test-e2e/syclcompat/util/util_reverse_bits_test.cpp +++ b/sycl/test-e2e/syclcompat/util/util_reverse_bits_test.cpp @@ -33,7 +33,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include void test_reverse_bits() { diff --git a/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp b/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp index fc40ede7357e0..a5ee1c3277552 100644 --- a/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp +++ b/sycl/test-e2e/syclcompat/util/util_select_from_sub_group.cpp @@ -34,7 +34,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #define WARP_SIZE 32 diff --git a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp index 3a3d66265b4f2..061b21b61bd53 100644 --- a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp +++ b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_left.cpp @@ -34,7 +34,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #define DATA_NUM 128 diff --git a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp index 1eb65d4302224..e80c16ed9fa00 100644 --- a/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp +++ b/sycl/test-e2e/syclcompat/util/util_shift_sub_group_right.cpp @@ -34,7 +34,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -#include +#include #include #define DATA_NUM 128 From 82c5a83b89851355ee0503b3999815f460d39f1c Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 21 Mar 2024 14:10:40 -0700 Subject: [PATCH 17/22] [SYCL] Bump SYCL_MAJOR_VERSION to "8" (#13097) It will be an ABI breaking release but we expect to allow such changes starting April 1st. --- clang/lib/Driver/CMakeLists.txt | 2 +- sycl/CMakeLists.txt | 6 +++--- sycl/test-e2e/lit.cfg.py | 4 ++-- sycl/test/abi/sycl_symbols_windows.dump | 2 +- sycl/test/lit.cfg.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index d5d2bd4aab75e..bfeb4a763da84 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -19,7 +19,7 @@ endif() # This must be in sync with llvm/sycl/CMakeLists.txt. SET_SOURCE_FILES_PROPERTIES( ToolChains/MSVC.cpp ToolChains/Clang.cpp - PROPERTIES COMPILE_DEFINITIONS SYCL_MAJOR_VERSION="7" ) + PROPERTIES COMPILE_DEFINITIONS SYCL_MAJOR_VERSION="8" ) add_clang_library(clangDriver Action.cpp diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 00ce045f43c3c..623a587c1dc6c 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -32,11 +32,11 @@ include(SYCLUtils) # # See doc/developer/ABIPolicyGuide.md for the meaning when in the middle of # development cycle. -set(SYCL_MAJOR_VERSION 7) -set(SYCL_MINOR_VERSION 2) +set(SYCL_MAJOR_VERSION 8) +set(SYCL_MINOR_VERSION 0) set(SYCL_PATCH_VERSION 0) -set(SYCL_DEV_ABI_VERSION 8) +set(SYCL_DEV_ABI_VERSION 0) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py index 62bd5a05ce080..bddcfded0f614 100644 --- a/sycl/test-e2e/lit.cfg.py +++ b/sycl/test-e2e/lit.cfg.py @@ -340,7 +340,7 @@ ( "%sycl_options", " " - + os.path.normpath(os.path.join(config.sycl_libs_dir + "/../lib/sycl7.lib")) + + os.path.normpath(os.path.join(config.sycl_libs_dir + "/../lib/sycl8.lib")) + " /I" + config.sycl_include + " /I" @@ -356,7 +356,7 @@ config.substitutions.append( ( "%sycl_options", - (" -lsycl7" if platform.system() == "Windows" else " -lsycl") + (" -lsycl8" if platform.system() == "Windows" else " -lsycl") + " -I" + config.sycl_include + " -I" diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2f180817725d4..0ed3998b6eb55 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3,7 +3,7 @@ # DO NOT EDIT IT MANUALLY. Refer to sycl/doc/developer/ABIPolicyGuide.md for more info. ################################################################################ -# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %llvm_build_bin_dir/sycl7.dll +# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir %python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %llvm_build_bin_dir/sycl8.dll # REQUIRES: windows # UNSUPPORTED: libcxx diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 104a4bd6c9d22..1fc5ef99e6f5a 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -133,7 +133,7 @@ config.substitutions.append(("%fsycl-host-only", sycl_host_only_options)) config.substitutions.append( - ("%sycl_lib", " -lsycl7" if platform.system() == "Windows" else "-lsycl") + ("%sycl_lib", " -lsycl8" if platform.system() == "Windows" else "-lsycl") ) llvm_config.add_tool_substitutions(["llvm-spirv"], [config.sycl_tools_dir]) From 345c3d0cb5c023d0c7c217e3f7897bc4556934b3 Mon Sep 17 00:00:00 2001 From: aelovikov-intel Date: Thu, 21 Mar 2024 15:04:56 -0700 Subject: [PATCH 18/22] [CI] Enable pr-code-format on branches in intel/llvm (#13101) --- .github/workflows/pr-code-format.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml index d72edd1cbfd15..a03a29602c7f9 100644 --- a/.github/workflows/pr-code-format.yml +++ b/.github/workflows/pr-code-format.yml @@ -3,6 +3,9 @@ on: pull_request_target: branches: - main + - sycl + - sycl-devops-pr/** + - sycl-rel-** permissions: pull-requests: write From 43e1aa28c0c98f3304274bd7690ce3db89b64600 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 21 Mar 2024 15:17:24 -0700 Subject: [PATCH 19/22] [SYCL] ensuring ZEX_NUMBER_OF_CCS is only used on COMPOSITE hierarchy mode (#13067) ZEX_NUMBER_OF_CCS not yet working in FLAT mode, the new default on PVC. Shoring up the tests in the interim. --- .../Plugin/level_zero_ext_intel_cslice.cpp | 19 +++++++++++++------ .../level_zero_ext_intel_queue_index.cpp | 12 ++++++++++-- .../Plugin/level_zero_sub_sub_device.cpp | 11 +++++++++-- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp b/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp index 25a97c4e26238..9c19691fd872a 100644 --- a/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp +++ b/sycl/test-e2e/Plugin/level_zero_ext_intel_cslice.cpp @@ -3,18 +3,25 @@ // RUN: %{build} -o %t.out -// RUN: env ZEX_NUMBER_OF_CCS=0:4 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC +// TODO - at this time ZEX_NUMBER_OF_CCS is not working with FLAT hierachy, +// which is the new default on PVC. Once it is supported, we'll test on both. +// In the interim, these are the environment vars that must be set to get cslice +// or the extra level of partition_by_affinity_domain with the "EXPOSE_" env +// var. +// DEFINE: %{setup_env} = env ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE ZE_AFFINITY_MASK=0 ZEX_NUMBER_OF_CCS=0:4 -// RUN: env SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 \ -// RUN: ZEX_NUMBER_OF_CCS=0:4 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC +// RUN: %{setup_env} env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC + +// RUN: %{setup_env} env SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 \ +// RUN: UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC // Same, but without using immediate commandlists: -// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 ZEX_NUMBER_OF_CCS=0:4 \ +// RUN: %{setup_env} env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 \ // RUN: UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC -// RUN: env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 \ -// RUN: ZEX_NUMBER_OF_CCS=0:4 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC +// RUN: %{setup_env} env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 \ +// RUN: UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC #include diff --git a/sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp b/sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp index e04f280a8e986..0e3752733291a 100644 --- a/sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp +++ b/sycl/test-e2e/Plugin/level_zero_ext_intel_queue_index.cpp @@ -1,10 +1,18 @@ // REQUIRES: aspect-ext_intel_device_id // REQUIRES: level_zero // RUN: %{build} -o %t.out -// RUN: env ZEX_NUMBER_OF_CCS=0:4 env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC + +// TODO - at this time ZEX_NUMBER_OF_CCS is not working with FLAT hierachy, +// which is the new default on PVC. Once it is supported, we'll test on both. +// In the interim, these are the environment vars that must be set to get cslice +// or the extra level of partition_by_affinity_domain with the "EXPOSE_" env +// var. +// DEFINE: %{setup_env} = env ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE ZE_AFFINITY_MASK=0 ZEX_NUMBER_OF_CCS=0:4 + +// RUN: %{setup_env} env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC // // Same with Immediate CommandLists -// RUN: env SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 env ZEX_NUMBER_OF_CCS=0:4 env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC +// RUN: %{setup_env} env SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING=1 env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK-PVC #include diff --git a/sycl/test-e2e/Plugin/level_zero_sub_sub_device.cpp b/sycl/test-e2e/Plugin/level_zero_sub_sub_device.cpp index 60287e71cee85..d9333e176acd7 100644 --- a/sycl/test-e2e/Plugin/level_zero_sub_sub_device.cpp +++ b/sycl/test-e2e/Plugin/level_zero_sub_sub_device.cpp @@ -1,8 +1,15 @@ // REQUIRES: gpu-intel-pvc, level_zero // RUN: %{build} %level_zero_options -o %t.out -// RUN: env UR_L0_DEBUG=1 env ZEX_NUMBER_OF_CCS=0:4 %{run} %t.out 2>&1 | FileCheck %s -// RUN: env ZEX_NUMBER_OF_CCS=0:4 %{run} %t.out + +// TODO - at this time ZEX_NUMBER_OF_CCS is not working with FLAT hierachy, +// which is the new default on PVC. Once it is supported, we'll test on both. +// In the interim, these are the environment vars that must be used in +// conjunction with ZEX_NUMBER_OF_CCS +// DEFINE: %{setup_env} = env ZE_FLAT_DEVICE_HIERARCHY=COMPOSITE ZE_AFFINITY_MASK=0 ZEX_NUMBER_OF_CCS=0:4 + +// RUN: %{setup_env} env UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s +// RUN: %{setup_env} %{run} %t.out // Check that queues created on sub-sub-devices are going to specific compute // engines: From 4ca6aac905b341bfddd59dd38c577bd13b0f3de6 Mon Sep 17 00:00:00 2001 From: Mike Rice Date: Thu, 21 Mar 2024 15:41:48 -0700 Subject: [PATCH 20/22] [clang][NFC] Regroup Sema SYCL declarations (#13099) In ac74d9ec0f73898713dd6e0d33f148c13a51875d declarations in Sema.h were regrouped by file. This change moves the SYCL code into the correct section. --- clang/include/clang/Sema/Sema.h | 450 ++++++++++++++++---------------- 1 file changed, 226 insertions(+), 224 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 81fc0ce4501bd..c600bcaa0089b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2167,32 +2167,6 @@ class Sema final { // // - SYCLIntelIVDepAttr * - BuildSYCLIntelIVDepAttr(const AttributeCommonInfo &CI, Expr *Expr1, - Expr *Expr2); - LoopUnrollHintAttr *BuildLoopUnrollHintAttr(const AttributeCommonInfo &A, - Expr *E); - OpenCLUnrollHintAttr * - BuildOpenCLLoopUnrollHintAttr(const AttributeCommonInfo &A, Expr *E); - - SYCLIntelLoopCountAttr * - BuildSYCLIntelLoopCountAttr(const AttributeCommonInfo &CI, Expr *E); - SYCLIntelInitiationIntervalAttr * - BuildSYCLIntelInitiationIntervalAttr(const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelMaxConcurrencyAttr * - BuildSYCLIntelMaxConcurrencyAttr(const AttributeCommonInfo &CI, Expr *E); - SYCLIntelMaxInterleavingAttr * - BuildSYCLIntelMaxInterleavingAttr(const AttributeCommonInfo &CI, Expr *E); - SYCLIntelSpeculatedIterationsAttr * - BuildSYCLIntelSpeculatedIterationsAttr(const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelLoopCoalesceAttr * - BuildSYCLIntelLoopCoalesceAttr(const AttributeCommonInfo &CI, Expr *E); - SYCLIntelMaxReinvocationDelayAttr * - BuildSYCLIntelMaxReinvocationDelayAttr(const AttributeCommonInfo &CI, - Expr *E); - /// \name Casts /// Implementations are in SemaCast.cpp ///@{ @@ -2422,22 +2396,6 @@ class Sema final { }; bool IsLayoutCompatible(QualType T1, QualType T2) const; - template - static bool isTypeDecoratedWithDeclAttribute(QualType Ty) { - const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); - if (!RecTy) - return false; - - if (RecTy->hasAttr()) - return true; - - if (auto *CTSD = dyn_cast(RecTy)) { - ClassTemplateDecl *Template = CTSD->getSpecializedTemplate(); - if (CXXRecordDecl *RD = Template->getTemplatedDecl()) - return RD->hasAttr(); - } - return false; - } bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto); @@ -2687,6 +2645,13 @@ class Sema final { /// Adds an expression to the set of gathered misaligned members. void AddPotentialMisalignedMembers(Expr *E, RecordDecl *RD, ValueDecl *MD, CharUnits Alignment); + + bool CheckIntelFPGARegBuiltinFunctionCall(unsigned BuiltinID, CallExpr *Call); + bool CheckIntelFPGAMemBuiltinFunctionCall(CallExpr *Call); + bool CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *Call); + bool CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned BuiltinID, + CallExpr *Call); ///@} // @@ -3943,6 +3908,8 @@ class Sema final { // Whether the callee should be ignored in CUDA/HIP/OpenMP host/device check. bool shouldIgnoreInHostDeviceCheck(FunctionDecl *Callee); + DeviceDiagnosticReason getEmissionReason(const FunctionDecl *Decl); + private: /// Function or variable declarations to be checked for whether the deferred /// diagnostics should be emitted. @@ -4209,20 +4176,6 @@ class Sema final { void addAMDGPUMaxNumWorkGroupsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *XExpr, Expr *YExpr, Expr *ZExpr); - /// addSYCLIntelPipeIOAttr - Adds a pipe I/O attribute to a particular - /// declaration. - void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID); - SYCLIntelPipeIOAttr *MergeSYCLIntelPipeIOAttr(Decl *D, - const SYCLIntelPipeIOAttr &A); - - /// AddSYCLIntelMaxConcurrencyAttr - Adds a max_concurrency attribute to a - /// particular declaration. - void AddSYCLIntelMaxConcurrencyAttr(Decl *D, - const AttributeCommonInfo &CI, - Expr *E); - - bool checkAllowedSYCLInitializer(VarDecl *VD); - //===--------------------------------------------------------------------===// DLLImportAttr *mergeDLLImportAttr(Decl *D, const AttributeCommonInfo &CI); DLLExportAttr *mergeDLLExportAttr(Decl *D, const AttributeCommonInfo &CI); MSInheritanceAttr *mergeMSInheritanceAttr(Decl *D, @@ -4230,20 +4183,10 @@ class Sema final { bool BestCase, MSInheritanceModel Model); - bool CheckCountedByAttr(Scope *Scope, const FieldDecl *FD); - EnforceTCBAttr *mergeEnforceTCBAttr(Decl *D, const EnforceTCBAttr &AL); EnforceTCBLeafAttr *mergeEnforceTCBLeafAttr(Decl *D, const EnforceTCBLeafAttr &AL); -public: - - DeviceDiagnosticReason getEmissionReason(const FunctionDecl *Decl); - - //@} - - // More parsing and symbol table subroutines. - // Helper for delayed processing of attributes. void ProcessDeclAttributeDelayed(Decl *D, const ParsedAttributesView &AttrList); @@ -4300,8 +4243,173 @@ class Sema final { void redelayDiagnostics(sema::DelayedDiagnosticPool &pool); + void AddSYCLIntelBankBitsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr **Exprs, unsigned Size); + bool AnyWorkGroupSizesDiffer(const Expr *LHSXDim, const Expr *LHSYDim, + const Expr *LHSZDim, const Expr *RHSXDim, + const Expr *RHSYDim, const Expr *RHSZDim); + bool AllWorkGroupSizesSame(const Expr *LHSXDim, const Expr *LHSYDim, + const Expr *LHSZDim, const Expr *RHSXDim, + const Expr *RHSYDim, const Expr *RHSZDim); + void AddSYCLWorkGroupSizeHintAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *XDim, Expr *YDim, Expr *ZDim); + SYCLWorkGroupSizeHintAttr * + MergeSYCLWorkGroupSizeHintAttr(Decl *D, const SYCLWorkGroupSizeHintAttr &A); + void AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + IntelReqdSubGroupSizeAttr * + MergeIntelReqdSubGroupSizeAttr(Decl *D, const IntelReqdSubGroupSizeAttr &A); + IntelNamedSubGroupSizeAttr * + MergeIntelNamedSubGroupSizeAttr(Decl *D, const IntelNamedSubGroupSizeAttr &A); + void AddSYCLIntelNumSimdWorkItemsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelNumSimdWorkItemsAttr * + MergeSYCLIntelNumSimdWorkItemsAttr(Decl *D, + const SYCLIntelNumSimdWorkItemsAttr &A); + void AddSYCLIntelESimdVectorizeAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelESimdVectorizeAttr * + MergeSYCLIntelESimdVectorizeAttr(Decl *D, + const SYCLIntelESimdVectorizeAttr &A); + void AddSYCLIntelSchedulerTargetFmaxMhzAttr(Decl *D, + const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelSchedulerTargetFmaxMhzAttr *MergeSYCLIntelSchedulerTargetFmaxMhzAttr( + Decl *D, const SYCLIntelSchedulerTargetFmaxMhzAttr &A); + void AddSYCLIntelNoGlobalWorkOffsetAttr(Decl *D, + const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelNoGlobalWorkOffsetAttr *MergeSYCLIntelNoGlobalWorkOffsetAttr( + Decl *D, const SYCLIntelNoGlobalWorkOffsetAttr &A); + void AddSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelLoopFuseAttr * + MergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &A); + void AddSYCLIntelPrivateCopiesAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + void AddSYCLIntelMaxReplicatesAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelMaxReplicatesAttr * + MergeSYCLIntelMaxReplicatesAttr(Decl *D, const SYCLIntelMaxReplicatesAttr &A); + void AddSYCLIntelForcePow2DepthAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelForcePow2DepthAttr * + MergeSYCLIntelForcePow2DepthAttr(Decl *D, + const SYCLIntelForcePow2DepthAttr &A); + void AddSYCLIntelInitiationIntervalAttr(Decl *D, + const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelInitiationIntervalAttr *MergeSYCLIntelInitiationIntervalAttr( + Decl *D, const SYCLIntelInitiationIntervalAttr &A); + + SYCLIntelMaxConcurrencyAttr * + MergeSYCLIntelMaxConcurrencyAttr(Decl *D, + const SYCLIntelMaxConcurrencyAttr &A); + void AddSYCLIntelMaxGlobalWorkDimAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelMaxGlobalWorkDimAttr * + MergeSYCLIntelMaxGlobalWorkDimAttr(Decl *D, + const SYCLIntelMaxGlobalWorkDimAttr &A); + void AddSYCLIntelMinWorkGroupsPerComputeUnitAttr( + Decl *D, const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMinWorkGroupsPerComputeUnitAttr * + MergeSYCLIntelMinWorkGroupsPerComputeUnitAttr( + Decl *D, const SYCLIntelMinWorkGroupsPerComputeUnitAttr &A); + void AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr( + Decl *D, const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMaxWorkGroupsPerMultiprocessorAttr * + MergeSYCLIntelMaxWorkGroupsPerMultiprocessorAttr( + Decl *D, const SYCLIntelMaxWorkGroupsPerMultiprocessorAttr &A); + void AddSYCLIntelBankWidthAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelBankWidthAttr * + MergeSYCLIntelBankWidthAttr(Decl *D, const SYCLIntelBankWidthAttr &A); + void AddSYCLIntelNumBanksAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelNumBanksAttr * + MergeSYCLIntelNumBanksAttr(Decl *D, const SYCLIntelNumBanksAttr &A); + SYCLDeviceHasAttr *MergeSYCLDeviceHasAttr(Decl *D, + const SYCLDeviceHasAttr &A); + void AddSYCLDeviceHasAttr(Decl *D, const AttributeCommonInfo &CI, + Expr **Exprs, unsigned Size); + SYCLUsesAspectsAttr *MergeSYCLUsesAspectsAttr(Decl *D, + const SYCLUsesAspectsAttr &A); + void AddSYCLUsesAspectsAttr(Decl *D, const AttributeCommonInfo &CI, + Expr **Exprs, unsigned Size); + bool CheckMaxAllowedWorkGroupSize(const Expr *RWGSXDim, const Expr *RWGSYDim, + const Expr *RWGSZDim, const Expr *MWGSXDim, + const Expr *MWGSYDim, const Expr *MWGSZDim); + void AddSYCLIntelMaxWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *XDim, Expr *YDim, Expr *ZDim); + SYCLIntelMaxWorkGroupSizeAttr * + MergeSYCLIntelMaxWorkGroupSizeAttr(Decl *D, + const SYCLIntelMaxWorkGroupSizeAttr &A); + void CheckSYCLAddIRAttributesFunctionAttrConflicts(Decl *D); + SYCLAddIRAttributesFunctionAttr *MergeSYCLAddIRAttributesFunctionAttr( + Decl *D, const SYCLAddIRAttributesFunctionAttr &A); + void AddSYCLAddIRAttributesFunctionAttr(Decl *D, + const AttributeCommonInfo &CI, + MutableArrayRef Args); + SYCLAddIRAttributesKernelParameterAttr * + MergeSYCLAddIRAttributesKernelParameterAttr( + Decl *D, const SYCLAddIRAttributesKernelParameterAttr &A); + void AddSYCLAddIRAttributesKernelParameterAttr(Decl *D, + const AttributeCommonInfo &CI, + MutableArrayRef Args); + SYCLAddIRAttributesGlobalVariableAttr * + MergeSYCLAddIRAttributesGlobalVariableAttr( + Decl *D, const SYCLAddIRAttributesGlobalVariableAttr &A); + void AddSYCLAddIRAttributesGlobalVariableAttr(Decl *D, + const AttributeCommonInfo &CI, + MutableArrayRef Args); + SYCLAddIRAnnotationsMemberAttr * + MergeSYCLAddIRAnnotationsMemberAttr(Decl *D, + const SYCLAddIRAnnotationsMemberAttr &A); + void AddSYCLAddIRAnnotationsMemberAttr(Decl *D, const AttributeCommonInfo &CI, + MutableArrayRef Args); + void AddSYCLReqdWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *XDim, Expr *YDim, Expr *ZDim); + SYCLReqdWorkGroupSizeAttr * + MergeSYCLReqdWorkGroupSizeAttr(Decl *D, const SYCLReqdWorkGroupSizeAttr &A); + + SYCLTypeAttr *MergeSYCLTypeAttr(Decl *D, const AttributeCommonInfo &CI, + SYCLTypeAttr::SYCLType TypeName); + + /// Emit a diagnostic about the given attribute having a deprecated name, and + /// also emit a fixit hint to generate the new attribute name. + void DiagnoseDeprecatedAttribute(const ParsedAttr &A, StringRef NewScope, + StringRef NewName); + + /// Diagnoses an attribute in the 'intelfpga' namespace and suggests using + /// the attribute in the 'intel' namespace instead. + void CheckDeprecatedSYCLAttributeSpelling(const ParsedAttr &A, + StringRef NewName = ""); + + /// addSYCLIntelPipeIOAttr - Adds a pipe I/O attribute to a particular + /// declaration. + void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID); + SYCLIntelPipeIOAttr *MergeSYCLIntelPipeIOAttr(Decl *D, + const SYCLIntelPipeIOAttr &A); + + /// AddSYCLIntelMaxConcurrencyAttr - Adds a max_concurrency attribute to a + /// particular declaration. + void AddSYCLIntelMaxConcurrencyAttr(Decl *D, const AttributeCommonInfo &CI, + Expr *E); + + bool CheckCountedByAttr(Scope *Scope, const FieldDecl *FD); + ///@} + + // + // + // ------------------------------------------------------------------------- + // // + + /// \name C++ Declarations + /// Implementations are in SemaDeclCXX.cpp + ///@{ + public: void CheckDelegatingCtorCycles(); @@ -5939,13 +6047,6 @@ class Sema final { SourceLocation RParen, ParsedType ParsedTy); - ExprResult BuildSYCLUniqueStableIdExpr(SourceLocation OpLoc, - SourceLocation LParen, - SourceLocation RParen, Expr *E); - ExprResult ActOnSYCLUniqueStableIdExpr(SourceLocation OpLoc, - SourceLocation LParen, - SourceLocation RParen, Expr *E); - bool CheckLoopHintExpr(Expr *E, SourceLocation Loc); ExprResult ActOnNumericConstant(const Token &Tok, Scope *UDLScope = nullptr); @@ -6997,6 +7098,14 @@ class Sema final { void CheckSubscriptAccessOfNoDeref(const ArraySubscriptExpr *E); void CheckAddressOfNoDeref(const Expr *E); +public: + ExprResult BuildSYCLUniqueStableIdExpr(SourceLocation OpLoc, + SourceLocation LParen, + SourceLocation RParen, Expr *E); + ExprResult ActOnSYCLUniqueStableIdExpr(SourceLocation OpLoc, + SourceLocation LParen, + SourceLocation RParen, Expr *E); + ///@} // @@ -9330,6 +9439,30 @@ class Sema final { const IdentifierInfo *AttrName, SourceRange Range); + SYCLIntelIVDepAttr *BuildSYCLIntelIVDepAttr(const AttributeCommonInfo &CI, + Expr *Expr1, Expr *Expr2); + LoopUnrollHintAttr *BuildLoopUnrollHintAttr(const AttributeCommonInfo &A, + Expr *E); + OpenCLUnrollHintAttr * + BuildOpenCLLoopUnrollHintAttr(const AttributeCommonInfo &A, Expr *E); + + SYCLIntelLoopCountAttr * + BuildSYCLIntelLoopCountAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelInitiationIntervalAttr * + BuildSYCLIntelInitiationIntervalAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMaxConcurrencyAttr * + BuildSYCLIntelMaxConcurrencyAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMaxInterleavingAttr * + BuildSYCLIntelMaxInterleavingAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelSpeculatedIterationsAttr * + BuildSYCLIntelSpeculatedIterationsAttr(const AttributeCommonInfo &CI, + Expr *E); + SYCLIntelLoopCoalesceAttr * + BuildSYCLIntelLoopCoalesceAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelMaxReinvocationDelayAttr * + BuildSYCLIntelMaxReinvocationDelayAttr(const AttributeCommonInfo &CI, + Expr *E); + ///@} // @@ -12701,138 +12834,6 @@ class Sema final { AddMethodToGlobalPool(Method, impl, /*instance*/ false); } - void AddSYCLIntelBankBitsAttr(Decl *D, const AttributeCommonInfo &CI, - Expr **Exprs, unsigned Size); - bool AnyWorkGroupSizesDiffer(const Expr *LHSXDim, const Expr *LHSYDim, - const Expr *LHSZDim, const Expr *RHSXDim, - const Expr *RHSYDim, const Expr *RHSZDim); - bool AllWorkGroupSizesSame(const Expr *LHSXDim, const Expr *LHSYDim, - const Expr *LHSZDim, const Expr *RHSXDim, - const Expr *RHSYDim, const Expr *RHSZDim); - void AddSYCLWorkGroupSizeHintAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *XDim, Expr *YDim, Expr *ZDim); - SYCLWorkGroupSizeHintAttr * - MergeSYCLWorkGroupSizeHintAttr(Decl *D, const SYCLWorkGroupSizeHintAttr &A); - void AddIntelReqdSubGroupSize(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - IntelReqdSubGroupSizeAttr * - MergeIntelReqdSubGroupSizeAttr(Decl *D, const IntelReqdSubGroupSizeAttr &A); - IntelNamedSubGroupSizeAttr * - MergeIntelNamedSubGroupSizeAttr(Decl *D, const IntelNamedSubGroupSizeAttr &A); - void AddSYCLIntelNumSimdWorkItemsAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelNumSimdWorkItemsAttr * - MergeSYCLIntelNumSimdWorkItemsAttr(Decl *D, - const SYCLIntelNumSimdWorkItemsAttr &A); - void AddSYCLIntelESimdVectorizeAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelESimdVectorizeAttr * - MergeSYCLIntelESimdVectorizeAttr(Decl *D, - const SYCLIntelESimdVectorizeAttr &A); - void AddSYCLIntelSchedulerTargetFmaxMhzAttr(Decl *D, - const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelSchedulerTargetFmaxMhzAttr *MergeSYCLIntelSchedulerTargetFmaxMhzAttr( - Decl *D, const SYCLIntelSchedulerTargetFmaxMhzAttr &A); - void AddSYCLIntelNoGlobalWorkOffsetAttr(Decl *D, - const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelNoGlobalWorkOffsetAttr *MergeSYCLIntelNoGlobalWorkOffsetAttr( - Decl *D, const SYCLIntelNoGlobalWorkOffsetAttr &A); - void AddSYCLIntelLoopFuseAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelLoopFuseAttr * - MergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &A); - void AddSYCLIntelPrivateCopiesAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - void AddSYCLIntelMaxReplicatesAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelMaxReplicatesAttr * - MergeSYCLIntelMaxReplicatesAttr(Decl *D, const SYCLIntelMaxReplicatesAttr &A); - void AddSYCLIntelForcePow2DepthAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelForcePow2DepthAttr * - MergeSYCLIntelForcePow2DepthAttr(Decl *D, - const SYCLIntelForcePow2DepthAttr &A); - void AddSYCLIntelInitiationIntervalAttr(Decl *D, - const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelInitiationIntervalAttr *MergeSYCLIntelInitiationIntervalAttr( - Decl *D, const SYCLIntelInitiationIntervalAttr &A); - - SYCLIntelMaxConcurrencyAttr *MergeSYCLIntelMaxConcurrencyAttr( - Decl *D, const SYCLIntelMaxConcurrencyAttr &A); - void AddSYCLIntelMaxGlobalWorkDimAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelMaxGlobalWorkDimAttr * - MergeSYCLIntelMaxGlobalWorkDimAttr(Decl *D, - const SYCLIntelMaxGlobalWorkDimAttr &A); - void AddSYCLIntelMinWorkGroupsPerComputeUnitAttr( - Decl *D, const AttributeCommonInfo &CI, Expr *E); - SYCLIntelMinWorkGroupsPerComputeUnitAttr * - MergeSYCLIntelMinWorkGroupsPerComputeUnitAttr( - Decl *D, const SYCLIntelMinWorkGroupsPerComputeUnitAttr &A); - void AddSYCLIntelMaxWorkGroupsPerMultiprocessorAttr( - Decl *D, const AttributeCommonInfo &CI, Expr *E); - SYCLIntelMaxWorkGroupsPerMultiprocessorAttr * - MergeSYCLIntelMaxWorkGroupsPerMultiprocessorAttr( - Decl *D, const SYCLIntelMaxWorkGroupsPerMultiprocessorAttr &A); - void AddSYCLIntelBankWidthAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelBankWidthAttr * - MergeSYCLIntelBankWidthAttr(Decl *D, const SYCLIntelBankWidthAttr &A); - void AddSYCLIntelNumBanksAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *E); - SYCLIntelNumBanksAttr * - MergeSYCLIntelNumBanksAttr(Decl *D, const SYCLIntelNumBanksAttr &A); - SYCLDeviceHasAttr *MergeSYCLDeviceHasAttr(Decl *D, - const SYCLDeviceHasAttr &A); - void AddSYCLDeviceHasAttr(Decl *D, const AttributeCommonInfo &CI, - Expr **Exprs, unsigned Size); - SYCLUsesAspectsAttr *MergeSYCLUsesAspectsAttr(Decl *D, - const SYCLUsesAspectsAttr &A); - void AddSYCLUsesAspectsAttr(Decl *D, const AttributeCommonInfo &CI, - Expr **Exprs, unsigned Size); - bool CheckMaxAllowedWorkGroupSize(const Expr *RWGSXDim, const Expr *RWGSYDim, - const Expr *RWGSZDim, const Expr *MWGSXDim, - const Expr *MWGSYDim, const Expr *MWGSZDim); - void AddSYCLIntelMaxWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *XDim, Expr *YDim, Expr *ZDim); - SYCLIntelMaxWorkGroupSizeAttr * - MergeSYCLIntelMaxWorkGroupSizeAttr(Decl *D, - const SYCLIntelMaxWorkGroupSizeAttr &A); - void CheckSYCLAddIRAttributesFunctionAttrConflicts(Decl *D); - SYCLAddIRAttributesFunctionAttr *MergeSYCLAddIRAttributesFunctionAttr( - Decl *D, const SYCLAddIRAttributesFunctionAttr &A); - void AddSYCLAddIRAttributesFunctionAttr(Decl *D, - const AttributeCommonInfo &CI, - MutableArrayRef Args); - SYCLAddIRAttributesKernelParameterAttr * - MergeSYCLAddIRAttributesKernelParameterAttr( - Decl *D, const SYCLAddIRAttributesKernelParameterAttr &A); - void AddSYCLAddIRAttributesKernelParameterAttr(Decl *D, - const AttributeCommonInfo &CI, - MutableArrayRef Args); - SYCLAddIRAttributesGlobalVariableAttr * - MergeSYCLAddIRAttributesGlobalVariableAttr( - Decl *D, const SYCLAddIRAttributesGlobalVariableAttr &A); - void AddSYCLAddIRAttributesGlobalVariableAttr(Decl *D, - const AttributeCommonInfo &CI, - MutableArrayRef Args); - SYCLAddIRAnnotationsMemberAttr * - MergeSYCLAddIRAnnotationsMemberAttr(Decl *D, - const SYCLAddIRAnnotationsMemberAttr &A); - void AddSYCLAddIRAnnotationsMemberAttr(Decl *D, const AttributeCommonInfo &CI, - MutableArrayRef Args); - void AddSYCLReqdWorkGroupSizeAttr(Decl *D, const AttributeCommonInfo &CI, - Expr *XDim, Expr *YDim, Expr *ZDim); - SYCLReqdWorkGroupSizeAttr * - MergeSYCLReqdWorkGroupSizeAttr(Decl *D, const SYCLReqdWorkGroupSizeAttr &A); - - SYCLTypeAttr *MergeSYCLTypeAttr(Decl *D, const AttributeCommonInfo &CI, - SYCLTypeAttr::SYCLType TypeName); - - private: /// AddMethodToGlobalPool - Add an instance or factory method to the global /// pool. See descriptoin of AddInstanceMethodToGlobalPool. @@ -15220,16 +15221,6 @@ class Sema final { void CheckSYCLKernelCall(FunctionDecl *CallerFunc, ArrayRef Args); - - bool CheckIntelFPGARegBuiltinFunctionCall(unsigned BuiltinID, CallExpr *Call); - bool CheckIntelFPGAMemBuiltinFunctionCall(CallExpr *Call); - - bool CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID, - CallExpr *Call); - bool CheckIntelSYCLAllocaBuiltinFunctionCall(unsigned BuiltinID, - CallExpr *Call); - -private: // We store SYCL Kernels here and handle separately -- which is a hack. // FIXME: It would be best to refactor this. llvm::SetVector SyclDeviceDecls; @@ -15321,15 +15312,7 @@ class Sema final { ExprResult BuildSYCLBuiltinBaseTypeExpr(SourceLocation Loc, QualType SourceTy, Expr *Idx); - /// Emit a diagnostic about the given attribute having a deprecated name, and - /// also emit a fixit hint to generate the new attribute name. - void DiagnoseDeprecatedAttribute(const ParsedAttr &A, StringRef NewScope, - StringRef NewName); - - /// Diagnoses an attribute in the 'intelfpga' namespace and suggests using - /// the attribute in the 'intel' namespace instead. - void CheckDeprecatedSYCLAttributeSpelling(const ParsedAttr &A, - StringRef NewName = ""); + bool checkAllowedSYCLInitializer(VarDecl *VD); /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current /// context is "used as device code". @@ -15373,8 +15356,27 @@ class Sema final { (VDecl->getType().getAddressSpace() == LangAS::sycl_private); } + template + static bool isTypeDecoratedWithDeclAttribute(QualType Ty) { + const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); + if (!RecTy) + return false; + + if (RecTy->hasAttr()) + return true; + + if (auto *CTSD = dyn_cast(RecTy)) { + ClassTemplateDecl *Template = CTSD->getSpecializedTemplate(); + if (CXXRecordDecl *RD = Template->getTemplatedDecl()) + return RD->hasAttr(); + } + return false; + } + /// Check whether \p Ty corresponds to a SYCL type of name \p TypeName. static bool isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName); + + ///@} }; DeductionFailureInfo From 2f758db64f695a7cd1846a63b2202653c153eaf2 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 21 Mar 2024 18:45:10 -0700 Subject: [PATCH 21/22] =?UTF-8?q?Revert=20"[UR][L0]=20Fix=20Native=20Host?= =?UTF-8?q?=20memory=20usage=20on=20device=20with=20copy=20bac=E2=80=A6=20?= =?UTF-8?q?(#13102)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …k sync (#13014)" This reverts commit 98894fe5e520072b17da3cc34598f8ac790249ca. --- sycl/plugins/unified_runtime/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 32ebe3784bf2f..b4f01966d5418 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -82,13 +82,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit c98fdbcf1f43ce132fbae75336bda984e4ce2e78 - # Merge: 5f4dd113 9b3cf9d3 - # Author: Kenneth Benzie (Benie) - # Date: Thu Mar 21 10:51:45 2024 +0000 - # Merge pull request #1439 from nrspruit/fix_device_native_proxy_buffer - # [L0] Fix Native Host memory usage on device with copy back sync - set(UNIFIED_RUNTIME_TAG c98fdbcf1f43ce132fbae75336bda984e4ce2e78) + # commit 5f4dd113824e90522d813420932c14072dc3049d + # Merge: ed1f8bf b551c77 + # Author: Ewan Crawford + # Date: Fri Mar 15 10:22:39 2024 +0000 + # Merge pull request #1447 from Bensuo/ewan/rocm_5_5_1 + # [HIP][CMDBUF] Require ROCm 5.5.1 for HIP command-buffers + set(UNIFIED_RUNTIME_TAG 5f4dd113824e90522d813420932c14072dc3049d) if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO) set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}") From 6246e20e40e10ca702a7eca43dcf72bde4489fdf Mon Sep 17 00:00:00 2001 From: ldrumm Date: Fri, 22 Mar 2024 12:07:14 +0000 Subject: [PATCH 22/22] [test-e2e] Disable broken test (#13091) Post-commit testing has been failing for multiple weeks. The root cause of this is sycl/test-e2e/Plugin/interop-level-zero-image.cpp which has been failing on the ARC GPUs consistently. This has clouded other test failures - why should anyone care about post-commit testing if it's already red. We should disable this test to lower the noise level; it's too late to revert the patch that introduced it. See https://github.com/intel/llvm/issues/13090 --- sycl/test-e2e/Plugin/interop-level-zero-image.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/Plugin/interop-level-zero-image.cpp b/sycl/test-e2e/Plugin/interop-level-zero-image.cpp index 2b0b36112d2a1..cce3f9d4e9980 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-image.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-image.cpp @@ -3,7 +3,8 @@ // RUN: %{run} %t.out // spir-v gen for legacy images at O0 not working -// UNSUPPORTED: O0 +// UNSUPPORTED: gpu-intel-dg2 +// This test is currently broken see https://github.com/intel/llvm/issues/13090 // This test verifies that make_image is working for 1D, 2D and 3D images. // We instantiate an image with L0, set its body, then use a host accessor to