diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 9f8ea3499f696..896a2ea8c183a 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -57,6 +57,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@f079b8493333aace61c81488f8bd40919487bd9f # v3.25.7 + uses: github/codeql-action/upload-sarif@b611370bb5703a7efb587f9d136a52ea24c5c38c # v3.25.11 with: sarif_file: results.sarif diff --git a/.github/workflows/sycl-linux-build.yml b/.github/workflows/sycl-linux-build.yml index 62c2eaa28bf1c..17dcc83aeb437 100644 --- a/.github/workflows/sycl-linux-build.yml +++ b/.github/workflows/sycl-linux-build.yml @@ -169,7 +169,8 @@ jobs: --cmake-opt=-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ --cmake-opt="-DLLVM_INSTALL_UTILS=ON" \ --cmake-opt="-DNATIVECPU_USE_OCK=Off" \ - --cmake-opt="-DSYCL_PI_TESTS=OFF" + --cmake-opt="-DSYCL_PI_TESTS=OFF" \ + --cmake-opt="-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=SPIRV" - name: Compile id: build run: cmake --build $GITHUB_WORKSPACE/build diff --git a/.github/workflows/sycl-nightly.yml b/.github/workflows/sycl-nightly.yml index fc0b90be7990a..32a7814fa1c5c 100644 --- a/.github/workflows/sycl-nightly.yml +++ b/.github/workflows/sycl-nightly.yml @@ -141,7 +141,7 @@ jobs: echo "TAG=$(date +'%Y-%m-%d')-${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" fi - name: Upload binaries - uses: softprops/action-gh-release@69320dbe05506a9a39fc8ae11030b214ec2d1f87 + uses: softprops/action-gh-release@a74c6b72af54cfa997e81df42d94703d6313a2d0 with: files: | sycl_linux.tar.gz diff --git a/.github/workflows/sycl-windows-build.yml b/.github/workflows/sycl-windows-build.yml index 54ab197a792db..4bd537146bf31 100644 --- a/.github/workflows/sycl-windows-build.yml +++ b/.github/workflows/sycl-windows-build.yml @@ -92,7 +92,8 @@ jobs: --cmake-opt="-DCMAKE_INSTALL_PREFIX=%GITHUB_WORKSPACE%\install" ^ --cmake-opt="-DCMAKE_CXX_COMPILER_LAUNCHER=sccache" ^ --cmake-opt="-DCMAKE_C_COMPILER_LAUNCHER=sccache" ^ - --cmake-opt="-DLLVM_INSTALL_UTILS=ON" + --cmake-opt="-DLLVM_INSTALL_UTILS=ON" ^ + --cmake-opt="-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=SPIRV" - name: Build id: build shell: bash diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index eeb63fa9335d5..30d2b65204229 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4087,6 +4087,10 @@ def fsycl_libspirv_path_EQ : Joined<["-"], "fsycl-libspirv-path=">, Visibility<[ClangOption, CLOption, DXCOption, CC1Option]>, HelpText<"Path to libspirv library">; def fno_sycl_libspirv : Flag<["-"], "fno-sycl-libspirv">, Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"Disable check for libspirv">; +def fsycl_use_spirv_backend_for_spirv_gen : Flag<["-"], "fsycl-use-spirv-backend-for-spirv-gen">, + Visibility<[ClangOption, CLOption]>, Flags<[HelpHidden]>, HelpText<"Use the SPIR-V backend for SPIR-V code generation. " + "Has effect only for SPIR-based targets. It is off by default and " + "the SPIR-V LLVM Translator is used for SPIR-V code generation. (experimental)">; def fsycl_host_compiler_EQ : Joined<["-"], "fsycl-host-compiler=">, Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CLOption, DXCOption]>, HelpText<"Specify C++ compiler binary to perform host " diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index d55a3201ae2c5..13f6e863b541e 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1381,9 +1381,9 @@ void CodeGenModule::Release() { for (const auto &Type : TypesWithAspects) { StringRef Name = Type.first; const RecordDecl *RD = Type.second; - AspectsMD->addOperand(getAspectsMD(Context, TheModule.getContext(), - Name, - RD->getAttr())); + if (const auto *Attr = RD->getAttr()) + AspectsMD->addOperand( + getAspectsMD(Context, TheModule.getContext(), Name, Attr)); } } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 06947c237eb18..5c8a9c878b38a 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -7785,12 +7785,28 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, for (auto *Arg : Args) { // Extract any '--[no-]offload-arch' arguments intended for this toolchain. std::unique_ptr ExtractedArg = nullptr; - if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && - ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { - Arg->claim(); - unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); - ExtractedArg = getOpts().ParseOneArg(Args, Index); - Arg = ExtractedArg.get(); + if (Kind == Action::OFK_SYCL) { + // For SYCL based offloading, we allow for -Xsycl-target-backend + // and -Xsycl-target-backend= for specifying options. + if (Arg->getOption().matches(options::OPT_Xsycl_backend_EQ) && + llvm::Triple(Arg->getValue(0)) == TC->getTriple()) { + Arg->claim(); + unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); + ExtractedArg = getOpts().ParseOneArg(Args, Index); + Arg = ExtractedArg.get(); + } else if (Arg->getOption().matches(options::OPT_Xsycl_backend)) { + unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(0)); + ExtractedArg = getOpts().ParseOneArg(Args, Index); + Arg = ExtractedArg.get(); + } + } else { + if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && + ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { + Arg->claim(); + unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); + ExtractedArg = getOpts().ParseOneArg(Args, Index); + Arg = ExtractedArg.get(); + } } // Add or remove the seen architectures in order of appearance. If an @@ -7855,8 +7871,18 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Archs.insert(CudaArchToString(CudaArch::HIPDefault)); else if (Kind == Action::OFK_OpenMP) Archs.insert(StringRef()); - else if (Kind == Action::OFK_SYCL) - Archs.insert(StringRef()); + else if (Kind == Action::OFK_SYCL) { + // For SYCL offloading, we need to check the triple for NVPTX or AMDGPU. + // The default arch is set for NVPTX if not provided. For AMDGPU, emit + // an error as the user is responsible to set the arch. + if (TC->getTriple().isNVPTX()) + Archs.insert(CudaArchToString(CudaArch::SM_50)); + else if (TC->getTriple().isAMDGPU()) + C.getDriver().Diag(clang::diag::err_drv_sycl_missing_amdgpu_arch) + << 1 << TC->getTriple().str(); + else + Archs.insert(StringRef()); + } } else { Args.ClaimAllArgs(options::OPT_offload_arch_EQ); Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index dd868666b916a..2f379b04b9a82 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -10440,6 +10440,47 @@ void OffloadDeps::ConstructJobMultipleOutputs(Compilation &C, constructJob(C, JA, Outputs, Inputs, TCArgs, LinkingOutput); } +// Utility function to gather all arguments for SPIR-V generation using the +// SPIR-V backend. This set of arguments is expected to get updated as we add +// more features/extensions to the SPIR-V backend. +static void getSPIRVBackendOpts(const llvm::opt::ArgList &TCArgs, + ArgStringList &BackendArgs) { + BackendArgs.push_back(TCArgs.MakeArgString("-filetype=obj")); + BackendArgs.push_back( + TCArgs.MakeArgString("-mtriple=spirv64-unknown-unknown")); + // TODO: Optimization level is currently forced to -O0 due to some testing + // issues. Update optimization level after testing issues are resolved. + BackendArgs.push_back(TCArgs.MakeArgString("-O0")); + BackendArgs.push_back( + TCArgs.MakeArgString("--avoid-spirv-capabilities=Shader")); + BackendArgs.push_back( + TCArgs.MakeArgString("--translator-compatibility-mode")); + + // TODO: There is some overlap between the lists of extensions in SPIR-V + // backend and SPIR-V Trnaslator). We will try to combine them when SPIR-V + // backdn is ready. + std::string ExtArg("--spirv-ext="); + std::string DefaultExtArg = + "+SPV_EXT_shader_atomic_float_add,+SPV_EXT_shader_atomic_float_min_max" + ",+SPV_KHR_no_integer_wrap_decoration,+SPV_KHR_float_controls" + ",+SPV_KHR_expect_assume,+SPV_KHR_linkonce_odr"; + std::string INTELExtArg = ",+SPV_INTEL_subgroups,+SPV_INTEL_function_pointers" + ",+SPV_INTEL_arbitrary_precision_integers" + ",+SPV_INTEL_variable_length_array"; + ExtArg = ExtArg + DefaultExtArg + INTELExtArg; + + // Other args + ExtArg += ",+SPV_INTEL_bfloat16_conversion" + ",+SPV_KHR_uniform_group_instructions" + ",+SPV_INTEL_optnone" + ",+SPV_KHR_subgroup_rotate" + ",+SPV_INTEL_usm_storage_classes" + ",+SPV_EXT_shader_atomic_float16_add" + ",+SPV_KHR_bit_instructions"; + + BackendArgs.push_back(TCArgs.MakeArgString(ExtArg)); +} + // Utility function to gather all llvm-spirv options. // Not dependent on target triple. static void getNonTripleBasedSPIRVTransOpts(Compilation &C, @@ -10545,6 +10586,7 @@ static void getTripleBasedSPIRVTransOpts(Compilation &C, } // Begin SPIRVTranslator +// TODO: Add a unique 'llc' JobAction for SPIR-V backends. void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -10560,17 +10602,22 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, TranslatorArgs.push_back("-o"); TranslatorArgs.push_back(Output.getFilename()); + bool UseSPIRVBackend = + TCArgs.hasArg(options::OPT_fsycl_use_spirv_backend_for_spirv_gen); if (JA.isDeviceOffloading(Action::OFK_SYCL)) { const toolchains::SYCLToolChain &TC = static_cast(getToolChain()); - getNonTripleBasedSPIRVTransOpts(C, TCArgs, TranslatorArgs); - llvm::Triple Triple = TC.getTriple(); - getTripleBasedSPIRVTransOpts(C, TCArgs, Triple, TranslatorArgs); - - // Handle -Xspirv-translator - TC.TranslateTargetOpt( - Triple, TCArgs, TranslatorArgs, options::OPT_Xspirv_translator, - options::OPT_Xspirv_translator_EQ, JA.getOffloadingArch()); + if (UseSPIRVBackend) { + getSPIRVBackendOpts(TCArgs, TranslatorArgs); + } else { + getNonTripleBasedSPIRVTransOpts(C, TCArgs, TranslatorArgs); + llvm::Triple Triple = TC.getTriple(); + getTripleBasedSPIRVTransOpts(C, TCArgs, Triple, TranslatorArgs); + // Handle -Xspirv-translator + TC.TranslateTargetOpt( + Triple, TCArgs, TranslatorArgs, options::OPT_Xspirv_translator, + options::OPT_Xspirv_translator_EQ, JA.getOffloadingArch()); + } } for (auto I : Inputs) { std::string Filename(I.getFilename()); @@ -10585,8 +10632,10 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, TranslatorArgs.push_back(C.getArgs().MakeArgString(Filename)); } - auto Cmd = std::make_unique(JA, *this, ResponseFileSupport::None(), - TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), + auto ToolName = UseSPIRVBackend ? "llc" : getShortName(); + auto Cmd = std::make_unique( + JA, *this, ResponseFileSupport::None(), + TCArgs.MakeArgString(getToolChain().GetProgramPath(ToolName)), TranslatorArgs, std::nullopt); if (!ForeachArgs.empty()) { diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 1db4500bd3b51..39a6d6d5534e8 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1223,7 +1223,7 @@ void SYCL::x86_64::BackendCompiler::ConstructJob( // Unsupported options for device compilation // -fcf-protection, -fsanitize, -fprofile-generate, -fprofile-instr-generate // -ftest-coverage, -fcoverage-mapping, -fcreate-profile, -fprofile-arcs -// -fcs-profile-generate -forder-file-instrumentation +// -fcs-profile-generate -forder-file-instrumentation, --coverage static std::vector getUnsupportedOpts(void) { std::vector UnsupportedOpts = { options::OPT_fsanitize_EQ, @@ -1235,6 +1235,7 @@ static std::vector getUnsupportedOpts(void) { options::OPT_fno_test_coverage, options::OPT_fcoverage_mapping, options::OPT_fno_coverage_mapping, + options::OPT_coverage, options::OPT_fprofile_instr_generate, options::OPT_fprofile_instr_generate_EQ, options::OPT_fprofile_arcs, diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 0c7342ac438e1..bdfc7e4bebce8 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -16610,16 +16610,19 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, DiscardCleanupsInEvaluationContext(); } - if (FD && ((LangOpts.OpenMP && (LangOpts.OpenMPIsTargetDevice || - !LangOpts.OMPTargetTriples.empty())) || - LangOpts.CUDA || LangOpts.SYCLIsDevice)) { + if (!FD) + return dcl; + + if ((LangOpts.OpenMP && + (LangOpts.OpenMPIsTargetDevice || !LangOpts.OMPTargetTriples.empty())) || + LangOpts.CUDA || LangOpts.SYCLIsDevice) { auto ES = getEmissionStatus(FD); if (ES == Sema::FunctionEmissionStatus::Emitted || ES == Sema::FunctionEmissionStatus::Unknown) DeclsToCheckForDeferredDiags.insert(FD); } - if (FD && !FD->isDeleted()) + if (!FD->isDeleted()) checkTypeSupport(FD->getType(), FD->getLocation(), FD); // Handle free functions. diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index a7a7347cfcbab..dcee4506e99f1 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -1025,7 +1025,6 @@ static void CheckForDuplicateAttrs(Sema &S, ArrayRef Attrs) { S.Diag((*LastFoundItr)->getLocation(), diag::err_loop_attr_conflict) << *FirstItr; S.Diag((*FirstItr)->getLocation(), diag::note_previous_attribute); - return; } } } diff --git a/clang/test/Driver/sycl-offload-new-driver.c b/clang/test/Driver/sycl-offload-new-driver.c index 17917df9013c7..1d32409db45a3 100644 --- a/clang/test/Driver/sycl-offload-new-driver.c +++ b/clang/test/Driver/sycl-offload-new-driver.c @@ -6,11 +6,11 @@ // OFFLOAD-NEW-DRIVER: 1: append-footer, {0}, c++, (host-sycl) // OFFLOAD-NEW-DRIVER: 2: preprocessor, {1}, c++-cpp-output, (host-sycl) // OFFLOAD-NEW-DRIVER: 3: compiler, {2}, ir, (host-sycl) -// OFFLOAD-NEW-DRIVER: 4: input, "[[INPUT]]", c++, (device-sycl) -// OFFLOAD-NEW-DRIVER: 5: preprocessor, {4}, c++-cpp-output, (device-sycl) -// OFFLOAD-NEW-DRIVER: 6: compiler, {5}, ir, (device-sycl) -// OFFLOAD-NEW-DRIVER: 7: backend, {6}, ir, (device-sycl) -// OFFLOAD-NEW-DRIVER: 8: offload, "device-sycl (nvptx64-nvidia-cuda)" {7}, ir +// OFFLOAD-NEW-DRIVER: 4: input, "[[INPUT]]", c++, (device-sycl, sm_50) +// OFFLOAD-NEW-DRIVER: 5: preprocessor, {4}, c++-cpp-output, (device-sycl, sm_50) +// OFFLOAD-NEW-DRIVER: 6: compiler, {5}, ir, (device-sycl, sm_50) +// OFFLOAD-NEW-DRIVER: 7: backend, {6}, ir, (device-sycl, sm_50) +// OFFLOAD-NEW-DRIVER: 8: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {7}, ir // OFFLOAD-NEW-DRIVER: 9: input, "[[INPUT]]", c++, (device-sycl) // OFFLOAD-NEW-DRIVER: 10: preprocessor, {9}, c++-cpp-output, (device-sycl) // OFFLOAD-NEW-DRIVER: 11: compiler, {10}, ir, (device-sycl) @@ -173,3 +173,16 @@ // WRAPPER_OPTIONS_BACKEND_AOT: clang-linker-wrapper{{.*}} "--host-triple=x86_64-unknown-linux-gnu" // WRAPPER_OPTIONS_BACKEND_AOT-SAME: "--gpu-tool-arg={{.*}}-backend-gpu-opt" // WRAPPER_OPTIONS_BACKEND_AOT-SAME: "--cpu-tool-arg={{.*}}-backend-cpu-opt" + +/// Verify arch settings for nvptx and amdgcn targets +// RUN: %clangxx -fsycl -### -fsycl-targets=amdgcn-amd-gpu -fno-sycl-libspirv \ +// RUN: -nocudalib --offload-new-driver \ +// RUN: -Xsycl-target-backend=amdgcn-amd-gpu --offload-arch=gfx600 \ +// RUN: %s 2>&1 \ +// RUN: | FileCheck -check-prefix AMD_ARCH %s +// AMD_ARCH: clang-offload-packager{{.*}} "--image=file={{.*}},triple=amdgcn-amd-gpu,arch=gfx600,kind=sycl,compile-opts=--offload-arch=gfx600" + +// RUN: %clangxx -fsycl -### -fsycl-targets=nvptx64-nvidia-cuda \ +// RUN: -fno-sycl-libspirv -nocudalib --offload-new-driver %s 2>&1 \ +// RUN: | FileCheck -check-prefix NVPTX_DEF_ARCH %s +// NVPTX_DEF_ARCH: clang-offload-packager{{.*}} "--image=file={{.*}},triple=nvptx64-nvidia-cuda,arch=sm_50,kind=sycl" diff --git a/clang/test/Driver/sycl-spirv-backend.cpp b/clang/test/Driver/sycl-spirv-backend.cpp new file mode 100644 index 0000000000000..db159f6deafb5 --- /dev/null +++ b/clang/test/Driver/sycl-spirv-backend.cpp @@ -0,0 +1,6 @@ +/// +/// Tests for using SPIR-V backend for SYCL offloading +/// +// RUN: %clangxx -fsycl -fsycl-use-spirv-backend-for-spirv-gen -### %s 2>&1 | FileCheck %s + +// CHECK: llc{{.*}} "-filetype=obj" "-mtriple=spirv64-unknown-unknown" "-O0" "--avoid-spirv-capabilities=Shader" "--translator-compatibility-mode" "--spirv-ext= diff --git a/clang/test/Driver/sycl-unsupported.cpp b/clang/test/Driver/sycl-unsupported.cpp index ae256a2d9b60a..f1ade4801af54 100644 --- a/clang/test/Driver/sycl-unsupported.cpp +++ b/clang/test/Driver/sycl-unsupported.cpp @@ -35,6 +35,14 @@ // RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT // RUN: %clangxx -fsycl -forder-file-instrumentation -### %s 2>&1 \ // RUN: | FileCheck %s -DARCH=spir64 -DOPT=-forder-file-instrumentation +// RUN: %clangxx -fsycl --coverage -### %s 2>&1 \ +// RUN: | FileCheck %s -DARCH=spir64 -DOPT=--coverage \ +// RUN: -DOPT_CC1=-coverage-notes-file \ +// RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT +// RUN: %clang_cl -fsycl --coverage -### %s 2>&1 \ +// RUN: | FileCheck %s -DARCH=spir64 -DOPT=--coverage \ +// RUN: -DOPT_CC1=-coverage-notes-file \ +// RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT // Check to make sure our '-fsanitize=address' exception isn't triggered by a // different option // RUN: %clangxx -fsycl -fprofile-instr-generate=address -### %s 2>&1 \ diff --git a/clang/test/SemaSYCL/intel-fpga-loops.cpp b/clang/test/SemaSYCL/intel-fpga-loops.cpp index 2b1a6d5043c1a..53d3e61f69c57 100644 --- a/clang/test/SemaSYCL/intel-fpga-loops.cpp +++ b/clang/test/SemaSYCL/intel-fpga-loops.cpp @@ -311,6 +311,30 @@ void zoo() { [[intel::speculated_iterations(2)]] for (int i = 0; i != 10; ++i) a[i] = 0; + [[intel::speculated_iterations(1)]] // expected-note 2{{previous attribute is here}} + [[intel::speculated_iterations(1)]] // OK + [[intel::speculated_iterations(2)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}} + [[intel::speculated_iterations(4)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + + [[intel::max_interleaving(0)]] // expected-note 2{{previous attribute is here}} + [[intel::max_interleaving(0)]] // OK + [[intel::max_interleaving(1)]] // expected-error {{conflicting loop attribute 'max_interleaving'}} + [[intel::max_interleaving(1)]] // expected-error {{conflicting loop attribute 'max_interleaving'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + + [[intel::max_concurrency(10)]] // expected-note 2{{previous attribute is here}} + [[intel::max_concurrency(10)]] // OK + [[intel::max_concurrency(20)]] // expected-error {{conflicting loop attribute 'max_concurrency'}} + [[intel::max_concurrency(40)]] // expected-error {{conflicting loop attribute 'max_concurrency'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + + [[intel::initiation_interval(10)]] // expected-note 2{{previous attribute is here}} + [[intel::initiation_interval(10)]] // OK + [[intel::initiation_interval(20)]] // expected-error {{conflicting loop attribute 'initiation_interval'}} + [[intel::initiation_interval(40)]] // expected-error {{conflicting loop attribute 'initiation_interval'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + [[intel::ivdep]] // expected-warning@+2 {{ignoring redundant Intel FPGA loop attribute 'ivdep': safelen INF >= safelen INF}} // expected-note@-2 {{previous attribute is here}} @@ -383,6 +407,12 @@ void zoo() { [[intel::max_reinvocation_delay(20)]] for (int i = 0; i != 10; ++i) a[i] = 0; + [[intel::max_reinvocation_delay(10)]] // expected-note 2{{previous attribute is here}} + [[intel::max_reinvocation_delay(10)]] // OK + [[intel::max_reinvocation_delay(20)]] // expected-error {{conflicting loop attribute 'max_reinvocation_delay'}} + [[intel::max_reinvocation_delay(40)]] // expected-error {{conflicting loop attribute 'max_reinvocation_delay'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + [[intel::enable_loop_pipelining]] // expected-error@+1 {{duplicate Intel FPGA loop attribute 'enable_loop_pipelining'}} [[intel::enable_loop_pipelining]] for (int i = 0; i != 10; ++i) @@ -476,7 +506,7 @@ void ivdep_dependent() { }; } -template +template void ii_dependent() { int a[10]; // expected-error@+1 {{'initiation_interval' attribute requires a positive integral compile time constant expression}} @@ -491,6 +521,13 @@ void ii_dependent() { [[intel::initiation_interval(A)]] // expected-note {{previous attribute is here}} [[intel::initiation_interval(B)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::initiation_interval(A)]] // expected-note 2{{previous attribute is here}} + [[intel::initiation_interval(A)]] // OK + [[intel::initiation_interval(B)]] // expected-error {{conflicting loop attribute 'initiation_interval'}} + [[intel::initiation_interval(D)]] // expected-error {{conflicting loop attribute 'initiation_interval'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } template @@ -515,6 +552,13 @@ void max_concurrency_dependent() { // max_concurrency attribute accepts value 0. [[intel::max_concurrency(D)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::max_concurrency(D)]] // expected-note 2{{previous attribute is here}} + [[intel::max_concurrency(D)]] // OK + [[intel::max_concurrency(A)]] // expected-error {{conflicting loop attribute 'max_concurrency'}} + [[intel::max_concurrency(B)]] // expected-error {{conflicting loop attribute 'max_concurrency'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } template @@ -540,9 +584,16 @@ void max_interleaving_dependent() { [[intel::max_interleaving(D)]] [[intel::max_interleaving(D)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::max_interleaving(D)]] // expected-note 2{{previous attribute is here}} + [[intel::max_interleaving(D)]] // OK + [[intel::max_interleaving(C)]] // expected-error {{conflicting loop attribute 'max_interleaving'}} + [[intel::max_interleaving(C)]] // expected-error {{conflicting loop attribute 'max_interleaving'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } -template +template void speculated_iterations_dependent() { int a[10]; // expected-error@+1 {{'speculated_iterations' attribute requires a non-negative integral compile time constant expression}} @@ -561,6 +612,13 @@ void speculated_iterations_dependent() { [[intel::speculated_iterations(B)]] [[intel::speculated_iterations(B)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::speculated_iterations(A)]] // expected-note 2{{previous attribute is here}} + [[intel::speculated_iterations(A)]] // OK + [[intel::speculated_iterations(B)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}} + [[intel::speculated_iterations(E)]] // expected-error {{conflicting loop attribute 'speculated_iterations'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } template @@ -624,7 +682,7 @@ void loop_count_control_dependent() { a[i] = 0; } -template +template void max_reinvocation_delay_dependent() { int a[10]; // expected-error@+1 {{'max_reinvocation_delay' attribute requires a positive integral compile time constant expression}} @@ -639,6 +697,13 @@ void max_reinvocation_delay_dependent() { [[intel::max_reinvocation_delay(A)]] [[intel::max_reinvocation_delay(A)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::max_reinvocation_delay(A)]] // expected-note 2{{previous attribute is here}} + [[intel::max_reinvocation_delay(A)]] // OK + [[intel::max_reinvocation_delay(B)]] // expected-error {{conflicting loop attribute 'max_reinvocation_delay'}} + [[intel::max_reinvocation_delay(D)]] // expected-error {{conflicting loop attribute 'max_reinvocation_delay'}} + for (int i = 0; i != 10; ++i) { a[i] = 0; } + } void check_max_concurrency_expression() { @@ -815,14 +880,14 @@ int main() { //expected-note@-1 +{{in instantiation of function template specialization}} ivdep_dependent<2, 4, -1>(); //expected-note@-1 +{{in instantiation of function template specialization}} - ii_dependent<2, 4, -1>(); + ii_dependent<2, 4, -1, 8>(); //expected-note@-1 +{{in instantiation of function template specialization}} max_concurrency_dependent<1, 4, -2, 0>(); // expected-note{{in instantiation of function template specialization 'max_concurrency_dependent<1, 4, -2, 0>' requested here}} max_interleaving_dependent<-1, 4, 0, 1>(); // expected-note{{in instantiation of function template specialization 'max_interleaving_dependent<-1, 4, 0, 1>' requested here}} - speculated_iterations_dependent<1, 8, -3, 0>(); // expected-note{{in instantiation of function template specialization 'speculated_iterations_dependent<1, 8, -3, 0>' requested here}} + speculated_iterations_dependent<1, 8, -3, 0, 16>(); // expected-note{{in instantiation of function template specialization 'speculated_iterations_dependent<1, 8, -3, 0, 16>' requested here}} loop_coalesce_dependent<-1, 4, 0>(); // expected-note{{in instantiation of function template specialization 'loop_coalesce_dependent<-1, 4, 0>' requested here}} loop_count_control_dependent<3, 2, -1>(); // expected-note{{in instantiation of function template specialization 'loop_count_control_dependent<3, 2, -1>' requested here}} - max_reinvocation_delay_dependent<1, 3, 0>(); // expected-note{{in instantiation of function template specialization 'max_reinvocation_delay_dependent<1, 3, 0>' requested here}} + max_reinvocation_delay_dependent<1, 3, 0, 6>(); // expected-note{{in instantiation of function template specialization 'max_reinvocation_delay_dependent<1, 3, 0, 6>' requested here}} check_max_concurrency_expression(); check_max_interleaving_expression(); check_speculated_iterations_expression(); diff --git a/libdevice/include/asan_libdevice.hpp b/libdevice/include/asan_libdevice.hpp index 21ddb7478173f..5f04b557e5acd 100644 --- a/libdevice/include/asan_libdevice.hpp +++ b/libdevice/include/asan_libdevice.hpp @@ -66,8 +66,8 @@ struct LocalArgsInfo { constexpr std::size_t ASAN_MAX_NUM_REPORTS = 10; struct LaunchInfo { - // Don't move this field, we use it in AddressSanitizerPass uintptr_t PrivateShadowOffset = 0; + uintptr_t PrivateShadowOffsetEnd = 0; uintptr_t LocalShadowOffset = 0; uintptr_t LocalShadowOffsetEnd = 0; @@ -82,8 +82,8 @@ constexpr unsigned ASAN_SHADOW_SCALE = 4; constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE; // Based on the observation, only the last 24 bits of the address of the private -// variable have changed, we use 31 bits(2G) to be safe. -constexpr std::size_t ASAN_PRIVATE_SIZE = 0x7fffffffULL + 1; +// variable have changed +constexpr std::size_t ASAN_PRIVATE_SIZE = 0xffffffULL + 1; // These magic values are written to shadow for better error // reporting. diff --git a/libdevice/sanitizer_utils.cpp b/libdevice/sanitizer_utils.cpp index e63c634e30930..651067be69851 100644 --- a/libdevice/sanitizer_utils.cpp +++ b/libdevice/sanitizer_utils.cpp @@ -65,6 +65,9 @@ static const __SYCL_CONSTANT__ char __global_shadow_out_of_bound[] = static const __SYCL_CONSTANT__ char __local_shadow_out_of_bound[] = "[kernel] Local shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: " "%p)\n"; +static const __SYCL_CONSTANT__ char __private_shadow_out_of_bound[] = + "[kernel] Private shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: " + "%p)\n"; static const __SYCL_CONSTANT__ char __asan_print_unsupport_device_type[] = "[kernel] Unsupport device type: %d\n"; @@ -123,7 +126,7 @@ inline uptr MemToShadow_DG2(uptr addr, uint32_t as) { } if (shadow_ptr > __AsanShadowMemoryGlobalEnd) { - if (__asan_report_out_of_shadow_bounds() && __AsanDebug) { + if (__asan_report_out_of_shadow_bounds()) { __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr); } } @@ -171,7 +174,7 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { } if (shadow_ptr > __AsanShadowMemoryGlobalEnd) { - if (__asan_report_out_of_shadow_bounds() && __AsanDebug) { + if (__asan_report_out_of_shadow_bounds()) { __spirv_ocl_printf(__global_shadow_out_of_bound, addr, shadow_ptr, (uptr)__AsanShadowMemoryGlobalStart); } @@ -207,13 +210,46 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) { ((addr & (SLM_SIZE - 1)) >> ASAN_SHADOW_SCALE); if (shadow_ptr > shadow_offset_end) { - if (__asan_report_out_of_shadow_bounds() && __AsanDebug) { + if (__asan_report_out_of_shadow_bounds()) { __spirv_ocl_printf(__local_shadow_out_of_bound, addr, shadow_ptr, wg_lid, (uptr)shadow_offset); } return 0; } return shadow_ptr; + } else if (as == ADDRESS_SPACE_PRIVATE) { // private + // work-group linear id + const auto WG_LID = + __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y * + __spirv_BuiltInNumWorkgroups.z + + __spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z + + __spirv_BuiltInWorkgroupId.z; + + auto launch_info = (__SYCL_GLOBAL__ const LaunchInfo *)__AsanLaunchInfo; + const auto shadow_offset = launch_info->PrivateShadowOffset; + const auto shadow_offset_end = launch_info->PrivateShadowOffsetEnd; + + if (shadow_offset == 0) { + return 0; + } + + if (__AsanDebug) + __spirv_ocl_printf(__mem_launch_info, launch_info, + launch_info->PrivateShadowOffset, 0, + launch_info->NumLocalArgs, launch_info->LocalArgs); + + uptr shadow_ptr = shadow_offset + + ((WG_LID * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE) + + ((addr & (ASAN_PRIVATE_SIZE - 1)) >> ASAN_SHADOW_SCALE); + + if (shadow_ptr > shadow_offset_end) { + if (__asan_report_out_of_shadow_bounds()) { + __spirv_ocl_printf(__private_shadow_out_of_bound, addr, shadow_ptr, + WG_LID, (uptr)shadow_offset); + } + return 0; + } + return shadow_ptr; } return 0; @@ -233,6 +269,8 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { return shadow_ptr; } +// FIXME: OCL "O2" optimizer doesn't work well with following code +#if 0 if (__AsanDebug) { if (shadow_ptr) { if (as == ADDRESS_SPACE_PRIVATE) @@ -244,6 +282,7 @@ inline uptr MemToShadow(uptr addr, uint32_t as) { __spirv_ocl_printf(__asan_print_shadow_value2, addr, as, shadow_ptr); } } +#endif return shadow_ptr; } @@ -606,6 +645,14 @@ ASAN_REPORT_ERROR(store, true, 16) ASAN_REPORT_ERROR_N(load, false) ASAN_REPORT_ERROR_N(store, true) +/// +/// ASAN convert memory address to shadow memory address +/// + +DEVICE_EXTERN_C_NOINLINE uptr __asan_mem_to_shadow(uptr ptr, uint32_t as) { + return MemToShadow(ptr, as); +} + /// /// ASAN initialize shdadow memory of local memory /// diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 38d5f2512a1c4..54357d1377c77 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -82,6 +82,7 @@ def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">; def AspectExt_oneapi_limited_graph : Aspect<"ext_oneapi_limited_graph">; def AspectExt_oneapi_private_alloca : Aspect<"ext_oneapi_private_alloca">; def AspectExt_oneapi_queue_profiling_tag : Aspect<"ext_oneapi_queue_profiling_tag">; +def AspectExt_oneapi_virtual_mem : Aspect<"ext_oneapi_virtual_mem">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">; @@ -139,7 +140,7 @@ def : TargetInfo<"__TestAspectList", AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component, AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph, - AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag], + AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6ef309d75fd20..3a8cfd0e2cc52 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -179,6 +179,8 @@ const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private"; const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64"; const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable"; +const char kAsanMemToShadow[] = "__asan_mem_to_shadow"; + // Accesses sizes are powers of two: 1, 2, 4, 8, 16. static const size_t kNumberOfAccessSizes = 5; @@ -447,7 +449,7 @@ static cl::opt ClOverrideDestructorKind( static cl::opt ClSpirOffloadPrivates("asan-spir-privates", cl::desc("instrument private pointer"), cl::Hidden, - cl::init(false)); + cl::init(true)); static cl::opt ClSpirOffloadGlobals("asan-spir-globals", cl::desc("instrument global pointer"), @@ -820,14 +822,15 @@ struct AddressSanitizer { Value *SizeArgument, uint32_t Exp, RuntimeCallInserter &RTCI); void instrumentMemIntrinsic(MemIntrinsic *MI, RuntimeCallInserter &RTCI); - Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); + Value *memToShadow(Value *Shadow, IRBuilder<> &IRB, + uint32_t AddressSpace = kSpirOffloadPrivateAS); bool suppressInstrumentationSiteForDebug(int &Instrumented); bool instrumentFunction(Function &F, const TargetLibraryInfo *TLI); bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool maybeInsertDynamicShadowAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); void instrumentSyclStaticLocalMemory(CallInst *CI); - void instrumentSyclDynamicLocalMemory(Function &F); + bool instrumentSyclDynamicLocalMemory(Function &F); GlobalVariable *GetOrCreateGlobalString(Module &M, StringRef Name, StringRef Value, @@ -899,6 +902,8 @@ struct AddressSanitizer { FunctionCallee AMDGPUAddressPrivate; int InstrumentationWithCallsThreshold; uint32_t MaxInlinePoisoningSize; + + FunctionCallee AsanMemToShadow; }; class ModuleAddressSanitizer { @@ -1067,7 +1072,7 @@ struct FunctionStackPoisoner : public InstVisitor { DIB(*F.getParent(), /*AllowUnresolved*/ false), C(ASan.C), IntptrTy(ASan.IntptrTy), IntptrPtrTy(PointerType::get(IntptrTy, 0)), Mapping(ASan.Mapping), - PoisonStack(ClStack && + PoisonStack((ClStack || ClSpirOffloadPrivates) && !Triple(F.getParent()->getTargetTriple()).isAMDGPU()) {} bool runOnFunction() { @@ -1352,7 +1357,7 @@ static void ExtendSpirKernelArgs(Module &M, FunctionAnalysisManager &FAM) { } // Fixup all users - for (auto [F, NewF] : SpirFuncs) { + for (auto &[F, NewF] : SpirFuncs) { SmallVector Users(F->users()); for (User *U : Users) { if (auto *CI = dyn_cast(U)) { @@ -1546,13 +1551,13 @@ void AddressSanitizer::AppendDebugInfoToArgs(Instruction *InsertBefore, Args.push_back(ConstantExpr::getPointerCast(FuncNameGV, ConstASPtrTy)); } -Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { +Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB, + uint32_t AddressSpace) { if (TargetTriple.isSPIR()) { - // ((Shadow & 0xffffffff) >> 3) + __AsanShadowMemoryPrivateStart; - Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, 0xffffffff)); - Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); - Value *ShadowBase = IRB.CreateLoad(IntptrTy, AsanShadowDevicePrivate); - return IRB.CreateAdd(Shadow, ShadowBase); + return IRB.CreateCall( + AsanMemToShadow, + {Shadow, ConstantInt::get(IRB.getInt32Ty(), AddressSpace)}, + "shadow_ptr"); } // Shadow >> scale Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); @@ -1621,7 +1626,7 @@ void AddressSanitizer::instrumentSyclStaticLocalMemory(CallInst *CI) { } // Instument dynamic local memory -void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { +bool AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { InstrumentationIRBuilder IRB(F.getEntryBlock().getFirstNonPHI()); // Save "__asan_launch" into local memory "__AsanLaunchInfo" @@ -1633,13 +1638,12 @@ void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { SmallVector LocalArgs; for (auto &Arg : F.args()) { Type *PtrTy = dyn_cast(Arg.getType()->getScalarType()); - // Local address space - if (PtrTy && PtrTy->getPointerAddressSpace() == 3) + if (PtrTy && PtrTy->getPointerAddressSpace() == kSpirOffloadLocalAS) LocalArgs.push_back(&Arg); } if (LocalArgs.empty()) - return; + return false; AllocaInst *ArgsArray = IRB.CreateAlloca( IntptrTy, ConstantInt::get(Int32Ty, LocalArgs.size()), "local_args"); @@ -1651,6 +1655,7 @@ void AddressSanitizer::instrumentSyclDynamicLocalMemory(Function &F) { IRB.CreateCall(AsanSetShadowDynamicLocalFunc, {IRB.CreatePointerCast(ArgsArray, IntptrTy), ConstantInt::get(Int32Ty, LocalArgs.size())}); + return true; } // Instrument memset/memmove/memcpy @@ -3234,14 +3239,6 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T ArrayType::get(IRB.getInt8Ty(), 0)); if (TargetTriple.isSPIR()) { - AsanShadowDevicePrivate = - M.getOrInsertGlobal("__AsanShadowMemoryPrivateStart", IntptrTy, [&] { - return new GlobalVariable(M, IntptrTy, true, - GlobalVariable::ExternalLinkage, nullptr, - "__AsanShadowMemoryPrivateStart", nullptr, - GlobalVariable::NotThreadLocal, 1); - }); - // __asan_set_shadow_static_local( // uptr ptr, // size_t size, @@ -3265,6 +3262,9 @@ void AddressSanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo *T GlobalVariable::ExternalLinkage, nullptr, "__AsanLaunchInfo", nullptr, GlobalVariable::NotThreadLocal, kSpirOffloadLocalAS); }); + + AsanMemToShadow = M.getOrInsertFunction(kAsanMemToShadow, IntptrTy, + IntptrTy, Type::getInt32Ty(*C)); } AMDGPUAddressShared = @@ -3362,8 +3362,16 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false; if (F.getName().starts_with("__asan_")) return false; - if (F.getName().contains("__sycl_service_kernel__")) - return false; + + if (TargetTriple.isSPIR()) { + if (F.getName().contains("__sycl_service_kernel__")) + return false; + // Skip referenced-indirectly function as we insert access to shared local + // memory (SLM) __AsanLaunchInfo and access to SLM in referenced-indirectly + // function isn't supported yet in intel-graphics-compiler. + if (F.hasFnAttribute("referenced-indirectly")) + return false; + } bool FunctionModified = false; @@ -3393,10 +3401,6 @@ bool AddressSanitizer::instrumentFunction(Function &F, // can be passed to that intrinsic. markEscapedLocalAllocas(F); - if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { - instrumentSyclDynamicLocalMemory(F); - } - // We want to instrument every address only once per basic block (unless there // are calls between uses). SmallPtrSet TempsToInstrument; @@ -3516,6 +3520,11 @@ bool AddressSanitizer::instrumentFunction(Function &F, if (ChangedStack || !NoReturnCalls.empty()) FunctionModified = true; + // We need to instrument dynamic local arguments after stack poisoner + if (F.getCallingConv() == CallingConv::SPIR_KERNEL) { + FunctionModified |= instrumentSyclDynamicLocalMemory(F); + } + LLVM_DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " " << F << "\n"); @@ -4001,32 +4010,39 @@ void FunctionStackPoisoner::processStaticAllocas() { AI->replaceAllUsesWith(NewAllocaPtr); } + auto TargetTriple = Triple(F.getParent()->getTargetTriple()); + // The left-most redzone has enough space for at least 4 pointers. - // Write the Magic value to redzone[0]. Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); - IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), - BasePlus0); - // Write the frame description constant to redzone[1]. - Value *BasePlus1 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, ASan.LongSize / 8)), - IntptrPtrTy); - GlobalVariable *StackDescriptionGlobal = - createPrivateGlobalForString(*F.getParent(), DescriptionString, - /*AllowMerging*/ true, kAsanGenPrefix); - Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); - IRB.CreateStore(Description, BasePlus1); - // Write the PC to redzone[2]. - Value *BasePlus2 = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), - IntptrPtrTy); - IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); + // SPIRV doesn't use the following metadata + if (!TargetTriple.isSPIR()) { + // Write the Magic value to redzone[0]. + IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), + BasePlus0); + // Write the frame description constant to redzone[1]. + Value *BasePlus1 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, ASan.LongSize / 8)), + IntptrPtrTy); + GlobalVariable *StackDescriptionGlobal = + createPrivateGlobalForString(*F.getParent(), DescriptionString, + /*AllowMerging*/ true, kAsanGenPrefix); + Value *Description = + IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); + IRB.CreateStore(Description, BasePlus1); + // Write the PC to redzone[2]. + Value *BasePlus2 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, 2 * ASan.LongSize / 8)), + IntptrPtrTy); + IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); + } const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L); // Poison the stack red zones at the entry. - Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); + Value *ShadowBase = + ASan.memToShadow(LocalStackBase, IRB, kSpirOffloadPrivateAS); // As mask we must use most poisoned case: red zones and after scope. // As bytes we can use either the same or just red zones only. copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase); diff --git a/llvm/test/CodeGen/SPIRV/transcoding/TransFNeg.ll b/llvm/test/CodeGen/SPIRV/transcoding/TransFNeg.ll index eb52a775d38ee..ebde02b7e51ae 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/TransFNeg.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/TransFNeg.ll @@ -1,5 +1,8 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; https://github.com/intel/llvm/issues/14372 +; UNSUPPORTED: windows + ; CHECK-SPIRV: OpFNegate ; CHECK-SPIRV: OpFNegate ; CHECK-SPIRV: OpFNegate diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll new file mode 100644 index 0000000000000..bf412b8225b79 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/instrument_private_address_space.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-stack=0 -asan-globals=0 -asan-constructor-kind=none -asan-spir-privates=1 -asan-use-after-return=never -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [1 x i64] } +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } + +@__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p = private unnamed_addr addrspace(1) constant [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4 + +define spir_func i32 @_Z3fooPii(ptr addrspace(4) %p) { +entry: + %arrayidx = getelementptr inbounds i32, ptr addrspace(4) %p, i64 0 + %0 = load i32, ptr addrspace(4) %arrayidx, align 4 + ret i32 %0 +} + +define spir_kernel void @kernel() #0 { +; CHECK-LABEL: define spir_kernel void @kernel +entry: + %p.i = alloca [4 x i32], align 4 + ; CHECK: %shadow_ptr = call i64 @__asan_mem_to_shadow(i64 %0, i32 0) + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %p.i) + call void @llvm.memcpy.p0.p1.i64(ptr align 4 %p.i, ptr addrspace(1) align 4 @__const._ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlvE_clEv.p, i64 16, i1 false) + %arraydecay.i = getelementptr inbounds [4 x i32], ptr %p.i, i64 0, i64 0 + %0 = addrspacecast ptr %arraydecay.i to ptr addrspace(4) + %call.i = call spir_func i32 @_Z3fooPii(ptr addrspace(4) %0) + ret void +} + +attributes #0 = { mustprogress norecurse nounwind sanitize_address uwtable } diff --git a/llvm/test/Instrumentation/AddressSanitizer/SPIRV/skip_referenced_indirectly.ll b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/skip_referenced_indirectly.ll new file mode 100644 index 0000000000000..c2203ae7f242b --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/SPIRV/skip_referenced_indirectly.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 | FileCheck %s + +; Check referenced-indirectly function isn't instrumented. + +target triple = "spir64-unknown-unknown" + +%structtype = type { [3 x ptr addrspace(4)] } +%class.Base = type <{ ptr addrspace(4), i32, [4 x i8] }> +@_ZTV8Derived1 = linkonce_odr addrspace(1) constant %structtype { [3 x ptr addrspace(4)] [ptr addrspace(4) null, ptr addrspace(4) null, ptr addrspace(4) addrspacecast (ptr @_ZN8Derived17displayEv to ptr addrspace(4))] }, align 8, !spirv.Decorations !0 + +define linkonce_odr spir_func i32 @_ZN8Derived17displayEv(ptr addrspace(4) align 8 %this) sanitize_address "referenced-indirectly" { +entry: +; CHECK-NOT: call void @__asan_load + + %base_data = getelementptr inbounds %class.Base, ptr addrspace(4) %this, i64 0, i32 1 + %1 = load i32, ptr addrspace(4) %base_data, align 8 + ret i32 %1 +} + +!0 = !{!1, !2, !3} +!1 = !{i32 22} +!2 = !{i32 41, !"_ZTV8Derived1", i32 2} +!3 = !{i32 44, i32 8} diff --git a/llvm/test/tools/llc/llc_emit_spirv_val_error_as_warning.ll b/llvm/test/tools/llc/llc_emit_spirv_val_error_as_warning.ll new file mode 100644 index 0000000000000..a054ef2876d8b --- /dev/null +++ b/llvm/test/tools/llc/llc_emit_spirv_val_error_as_warning.ll @@ -0,0 +1,38 @@ +// REQUIRES: spirv-val,system-linux + +; RUN: llc %s -filetype=obj -mtriple=spirv64-unknown-unknown -O0 --avoid-spirv-capabilities=Shader --translator-compatibility-mode --spirv-ext=+SPV_INTEL_function_pointers -o %t.spv 2>&1 | FileCheck -check-prefix=CHECK-WARNINGS %s + +; Check for spirv-val warnings. + +; CHECK-WARNINGS: llc: warning: SPIR-V validation started. +; CHECK-WARNINGS-DAG: error: line {{[0-9]+}}: ID '16[%_Z2f1i]' has not been defined +; CHECK-WARNINGS-DAG: llc: warning: SPIR-V validation failed. + +target triple = "spirv64-unknown-unknown" + +define dso_local i32 @_Z2f1i(i32 %0) { + %2 = add nsw i32 %0, 1 + ret i32 %2 +} + +define dso_local i32 @_Z2f2i(i32 %0) { + %2 = add nsw i32 %0, 2 + ret i32 %2 +} + +define dso_local i64 @_Z3runiiPi(i32 %0, i32 %1, ptr nocapture %2) local_unnamed_addr { + %4 = icmp slt i32 %0, 10 + br i1 %4, label %5, label %7 + +5: + %6 = add nsw i32 %1, 2 + store i32 %6, ptr %2, align 4 + br label %7 + +7: + %8 = phi <2 x i64> [ , %5 ], [ , %3 ] + %9 = extractelement <2 x i64> %8, i64 0 + %10 = extractelement <2 x i64> %8, i64 1 + %11 = add nsw i64 %9, %10 + ret i64 %11 +} diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index e7bf192192b67..7097443845e31 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -44,7 +44,9 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/InitLLVM.h" +#include "llvm/Support/Path.h" #include "llvm/Support/PluginLoader.h" +#include "llvm/Support/Program.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/TimeProfiler.h" @@ -314,6 +316,21 @@ static std::unique_ptr GetOutputStream(const char *TargetName, return FDOut; } +std::string getMainExecutable(const char *Name) { + void *Ptr = (void *)(intptr_t)&getMainExecutable; + auto COWPath = sys::fs::getMainExecutable(Name, Ptr); + return sys::path::parent_path(COWPath).str(); +} + +Expected findProgram(StringRef Name, ArrayRef Paths) { + ErrorOr Path = sys::findProgramByName(Name, Paths); + if (!Path) + Path = sys::findProgramByName(Name); + if (!Path) + return ""; + return *Path; +} + // main - Entry point for the llc compiler. // int main(int argc, char **argv) { @@ -411,6 +428,27 @@ int main(int argc, char **argv) { if (RemarksFile) RemarksFile->keep(); + if (StringRef(OutputFilename).ends_with(".spv")) { + // An external tool (spirv-val) is used to validate the generated SPIR-V + // code. Github page: https://github.com/KhronosGroup/SPIRV-Tools + // Currently, this tool exists out-of-tree and it is the user's + // responsibility to make it available during the compilation process. + // TODO: Replace the tool invocation with an API library call when the tool + // is made available in-tree. + Expected SPIRVValPath = + findProgram("spirv-val", {getMainExecutable("spirv-val")}); + if (!SPIRVValPath || *SPIRVValPath == "") { + WithColor::warning(errs(), argv[0]) << "spirv-val not found.\n"; + return 0; + } + SmallVector CmdArgs; + CmdArgs.push_back(*SPIRVValPath); + CmdArgs.push_back(OutputFilename); + WithColor::warning(errs(), argv[0]) << "SPIR-V validation started.\n"; + if (sys::ExecuteAndWait(*SPIRVValPath, CmdArgs)) + WithColor::warning(errs(), argv[0]) << "SPIR-V validation failed.\n"; + return 0; + } return 0; } diff --git a/sycl/doc/GetStartedGuide.md b/sycl/doc/GetStartedGuide.md index de14612ba53e5..70d180e686c02 100644 --- a/sycl/doc/GetStartedGuide.md +++ b/sycl/doc/GetStartedGuide.md @@ -12,7 +12,6 @@ and a wide range of compute accelerators such as GPU and FPGA. * [Build DPC++ toolchain with support for NVIDIA CUDA](#build-dpc-toolchain-with-support-for-nvidia-cuda) * [Build DPC++ toolchain with support for HIP AMD](#build-dpc-toolchain-with-support-for-hip-amd) * [Build DPC++ toolchain with support for HIP NVIDIA](#build-dpc-toolchain-with-support-for-hip-nvidia) - * [Build DPC++ toolchain with support for ESIMD CPU Emulation](#build-dpc-toolchain-with-support-for-esimd-cpu-emulation) * [Build DPC++ toolchain with support for runtime kernel fusion](#build-dpc-toolchain-with-support-for-runtime-kernel-fusion) * [Build DPC++ toolchain with a custom Unified Runtime](#build-dpc-toolchain-with-a-custom-unified-runtime) * [Build Doxygen documentation](#build-doxygen-documentation) @@ -302,32 +301,6 @@ as well as the CUDA Runtime API to be installed, see Currently, this has only been tried on Linux, with ROCm 4.2.0 or 4.3.0, with CUDA 11, and using a GeForce 1060 device. -### Build DPC++ toolchain with support for ESIMD CPU Emulation - -There is experimental support for DPC++ for using ESIMD CPU Emulation. - -This feature supports ESIMD CPU Emulation using CM_EMU library -[CM Emulation project](https://github.com/intel/cm-cpu-emulation). The library -package will be generated from source codes downloaded from its open source -project and installed in your deploy directory during toolchain build. - -To enable support for ESIMD CPU emulation, follow the instructions for the Linux -DPC++ toolchain, but add the `--enable-esimd-emulator`. - -Enabling this flag requires following packages installed. - -* Ubuntu 22.04 - * libva-dev / 2.7.0-2 - * libffi-dev / 3.3-4 - * libtool -* RHEL 8.\* - * libffi - * libffi-devel - * libva - * libva-devel - -Currently, this feature was tested and verified on Ubuntu 22.04 environment. - ### Build DPC++ toolchain with support for runtime kernel fusion Support for the experimental SYCL extension for user-driven kernel fusion at diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc new file mode 100644 index 0000000000000..72a6e1ed14f55 --- /dev/null +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc @@ -0,0 +1,398 @@ += sycl_ext_oneapi_virtual_mem + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2023 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 8 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + + +== Status + +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* + + +== Backend support status + +The APIs in this extension may be used only on a device that has +`aspect::ext_oneapi_virtual_mem`. The application must check that the devices +in the corresponding context have this aspect before using any of the APIs +introduced in this extension. If the application fails to do this, the +implementation throws a synchronous exception with the +`errc::feature_not_supported` error code. + +== Overview + +This extension adds the notion of "virtual memory ranges" to SYCL, introducing +a way to map an address range onto multiple allocations of physical memory, +allowing users to avoid expensive reallocations and potentially running out of +device memory while relocating the corresponding memory. + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_VIRTUAL_MEM` to one of the values defined in the table +below. Applications can test for the existence of this macro to determine if +the implementation supports this feature, or applications can test the macro's +value to determine which of the extension's features the implementation +supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. +|=== + + +=== Device aspect + +Support for the features introduced in this extension can be queried using the +new `aspect::ext_oneapi_virtual_mem` defined as: + +```c++ +namespace sycl { + +enum class aspect : /* unspecified */ { + ... + ext_oneapi_virtual_mem +} + +} // namespace sycl +``` + + +=== Memory granularity + +Working with virtual address ranges and the underlying physical memory requires +the user to align and adjust in accordance with a specified minimum granularity. + +The interfaces make the distinction between device granularity, which is the +granularity required for physical memory allocations, and context granularity, +which is the granularity required for virtual memory range reservations. + +The queries provide both a minimum and a recommended granularity. The minimum +device granularity is the smallest granularity that is supported for physical +memory allocations, and the minimum context granularity is the smallest +granularity that is supported from virtual memory range reservations. However, +the recommended granularity may be larger than these minimums and may provide +better performance. + +The interfaces for querying these granularities are defined as: + +```c++ +namespace sycl::ext::oneapi::experimental { + +enum class granularity_mode : /*unspecified*/ { + minimum, + recommended +}; + +size_t get_mem_granularity(const device &syclDevice, const context &syclContext, + granularity_mode mode = granularity_mode::recommended); + +size_t get_mem_granularity(const context &syclContext, + granularity_mode mode = granularity_mode::recommended); + +} // namespace sycl::ext::oneapi::experimental +``` + +[frame="topbot",options="header,footer"] +|===================== +|Function |Description + +|`size_t get_mem_granularity(const device &syclDevice, const context &syclContext, granularity_mode mode = granularity_mode::recommended)` | +Returns the granularity of physical memory allocations on `syclDevice` in the +`syclContext`. The `mode` argument specifies whether the query is for the +minimum or recommended granularity. + +If `syclDevice` does not have `aspect::ext_oneapi_virtual_mem` the call throws +an exception with `errc::feature_not_supported`. + +|`size_t get_mem_granularity(const context &syclContext, granularity_mode mode = granularity_mode::recommended)` | +Returns the granularity of virtual memory range reservations in the +`syclContext`. The `mode` argument specifies whether the query is for the +minimum or recommended granularity. + +If any device in `syclContext` does not have `aspect::ext_oneapi_virtual_mem` +the call throws an exception with `errc::feature_not_supported`. + +|===================== + +=== Reserving virtual address ranges + +Virtual address ranges are represented by a `uintptr_t` and a number of bytes +reserved for it. The `uintptr_t` must be aligned in accordance with the minimum +granularity of the corresponding `context`, as queried through +`get_mem_granularity`, and likewise the number of bytes must be a multiple of +this granularity. It is the responsibility of the user to manage the +constituents of any virtual address range they reserve. + +The interfaces for reserving, freeing, and manipulating the access mode of a +virtual address range are defined as: + +```c++ +namespace sycl::ext::oneapi::experimental { + +uintptr_t reserve_virtual_mem(uintptr_t start, size_t numBytes, const context &syclContext); +uintptr_t reserve_virtual_mem(size_t numBytes, const context &syclContext); + +void free_virtual_mem(uintptr_t ptr, size_t numBytes, const context &syclContext); + +} // namespace sycl::ext::oneapi::experimental +``` + +[frame="topbot",options="header,footer"] +|===================== +|Function |Description + +|`uintptr_t reserve_virtual_mem(uintptr_t start, size_t numBytes, const context &syclContext)` | +Reserves a virtual memory range in `syclContext` with `numBytes` bytes. + +`start` specifies the requested start of the new virtual memory range +reservation. If the implementation is unable to reserve the virtual memory range +at the specified address, the implementation will pick another suitable address. + +`start` must be aligned in accordance with the minimum granularity for +`syclContext`, as returned by a call to `get_mem_granularity`. Likewise, +`numBytes` must be a multiple of the minimum granularity. Attempting to call +this function without meeting these requirements results in undefined behavior. + +If any of the devices in `syclContext` do not have +`aspect::ext_oneapi_virtual_mem` the call throws an exception with +`errc::feature_not_supported`. + +|`uintptr_t reserve_virtual_mem(size_t numBytes, const context &syclContext)` | +Same as `reserve_virtual_mem(0, numBytes, syclContext)`. + +|`void free_virtual_mem(uintptr_t ptr, size_t numBytes, const context &syclContext)` | +Frees a virtual memory range specified by `ptr` and `numBytes`. `ptr` must be +the same as returned by a call to `reserve_virtual_mem` and `numBytes` must be +the same as the size of the range specified in the reservation call. + +The virtual memory range must not currently be mapped to physical memory. A call +to this function with a mapped virtual memory range results in undefined +behavior. + +|===================== + + +=== Physical memory representation + +:crs: https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:reference-semantics + +To represent the underlying physical device memory a virtual address is mapped +to, the `physical_mem` class is added. This new class is defined as: + +```c++ +namespace sycl::ext::oneapi::experimental { + +enum class address_access_mode : /*unspecified*/ { + none, + read, + read_write +}; + +class physical_mem { +public: + physical_mem(const device &syclDevice, const context &syclContext, size_t numBytes); + physical_mem(const queue &syclQueue, size_t numBytes); + + /* -- common interface members -- */ + + void *map(uintptr_t ptr, size_t numBytes, address_access_mode mode, size_t offset = 0) const; + + context get_context() const; + device get_device() const; + + size_t size() const noexcept; +}; + +} // namespace sycl::ext::oneapi::experimental +``` + +`physical_mem` has common reference semantics, as described in +{crs}[section 4.5.2. Common reference semantics]. + +[frame="topbot",options="header,footer"] +|============================ +|Member function |Description + +|`physical_mem(const device &syclDevice, const context &syclContext, size_t numBytes)` | +Constructs a `physical_mem` instance using the `syclDevice` provided. This +device must either be contained by `syclContext` or it must be a descendent +device of some device that is contained by that context, otherwise this function +throws a synchronous exception with the `errc::invalid` error code. + +This will allocate `numBytes` of physical memory on the device. `numBytes` must +be a multiple of the granularity for `syclDevice`, as returned by a call to +`get_mem_granularity`. + +If `syclDevice` does not have `aspect::ext_oneapi_virtual_mem` the call throws +an exception with `errc::feature_not_supported`. + +If the constructor is unable to allocate the required memory on `syclDevice`, +the call throws an exception with `errc::memory_allocation`. + +|`physical_mem(const queue &syclQueue, size_t numBytes)` | +Same as `physical_mem(syclQueue.get_device(), syclQueue.get_context, numBytes)`. + +|`void *map(uintptr_t ptr, size_t numBytes, address_access_mode mode, size_t offset = 0)` | +Maps a virtual memory range, specified by `ptr` and `numBytes`, to the physical +memory corresponding to this instance of `physical_mem`, starting at an offset +of `offset` bytes. + +It is required that `offset + numBytes` is less than or equal to `size()` and +that `ptr`, `numBytes` and `offset` are all multiples of the minimum granularity +for the device associated with this instance of `physical_mem`. + +If `mode` is `address_access_mode::read` or `address_access_mode::read_write` +the returned pointer is accessible after the call as read-only or read-write +respectively. Otherwise, it is considered inaccessible and accessing it will +result in undefined behavior. + +The returned pointer is equivalent to `reinterpret_cast(ptr)`. + +Writing to any address in the virtual memory range with access mode set to +`access_mode::read` results in undefined behavior. + +An accessible pointer behaves the same as a pointer to device USM memory and can +be used in place of a device USM pointer in any interface accepting one. + +A virtual memory range cannot be simultaneously mapped to more than one +physical memory region. Likewise, multiple virtual memory ranges cannot be +mapped onto the same physical memory region. Attempting to violate either of +these restrictions will result in undefined behavior. + +|`context get_context() const` | +Returns the SYCL context associated with the instance of `physical_mem`. + +|`device get_device() const` | +Returns the SYCL device associated with the instance of `physical_mem`. + +|`size_t size() const` | +Returns the size of the corresponding physical memory in bytes. + +|============================ + +Virtual memory address ranges are mapped to the a `physical_mem` through the +`map` member functions, where the access mode can also be specified. +To further get or set the access mode of a mapped virtual address range, the +user does not need to know the associated `physical_mem` and can just call the +following free functions. + +```c++ +namespace sycl::ext::oneapi::experimental { + +void set_access_mode(const void *ptr, size_t numBytes, address_access_mode mode, const context &syclContext); + +address_access_mode get_access_mode(const void *ptr, size_t numBytes, const context &syclContext); + +void unmap(const void *ptr, size_t numBytes, const context &syclContext); + +} // namespace sycl::ext::oneapi::experimental +``` + +[frame="topbot",options="header,footer"] +|===================== +|Function |Description + +|`void set_access_mode(const void *ptr, size_t numBytes, address_access_mode mode, const context &syclContext)` | +Changes the access mode of a mapped virtual memory range specified by `ptr` and +`numBytes`. + +If `mode` is `address_access_mode::read` or `address_access_mode::read_write` +`ptr` pointer is accessible after the call as read-only or read-write +respectively. Otherwise, it is considered inaccessible and accessing it will +result in undefined behavior. + +The virtual memory range specified by `ptr` and `numBytes` must be a sub-range +of virtual memory ranges previously mapped to `physical_mem`. `ptr` +must be aligned to the minimum memory granularity of the device associated with +the `physical_mem` the range is mapped to and `numBytes` must be a multiple of +the minimum memory granularity of the device associated with the `physical_mem` +the range is mapped to. + +Writing to any address in the virtual memory range with access mode set to +`address_access_mode::read` results in undefined behavior. + +An accessible pointer behaves the same as a pointer to device USM memory and can +be used in place of a device USM pointer in any interface accepting one. + +|`address_access_mode get_access_mode(const void *ptr, size_t numBytes, const context &syclContext)` | +Returns the access mode of the mapped virtual memory range specified by `ptr` +and `numBytes`. + +The virtual memory range specified by `ptr` and `numBytes` must be a sub-range +of virtual memory ranges previously mapped to `physical_mem`. `ptr` +must be aligned to the minimum memory granularity of the device associated with +the `physical_mem` the range is mapped to and `numBytes` must be a multiple of +the minimum memory granularity of the device associated with the `physical_mem` +the range is mapped to. + +|`void unmap(const void *ptr, size_t numBytes, const device &syclDevice, const context &syclContext)` | +Unmaps the range specified by `ptr` and `numBytes`. The range must have been +mapped through a call to `physical_mem::map()` prior to calling this. The range +must not be a proper sub-range of a previously mapped range. `syclContext` must +be the same as the context returned by the `get_context()` member function on +the `physical_mem` the address range is currently mapped to. + +After this call, the full range will again be ready to be mapped through a call +to `physical_mem::map()`. + +[_Note:_ Unmapping ranges that span multiple contiguous mapped ranges is not +supported. Doing so will result in undefined behavior. This restriction may be +lifted in the future. _{endnote}_] + +[_Note:_ The destructor for `physical_mem` will not unmap ranges mapped to it. +As such, the user must call `unmap` on ranges mapped to `physical_mem` objects +prior to their destruction. _{endnote}_] + +|===================== \ No newline at end of file diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index af128aa312410..1b9371769d826 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -518,19 +518,6 @@ class __SYCL_EXPORT AccessorBaseHost { AccessorBaseHost(const AccessorImplPtr &Impl) : impl{Impl} {} public: - // TODO: the following function to be removed during next ABI break window - AccessorBaseHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, int Dims, - int ElemSize, int OffsetInBytes = 0, - bool IsSubBuffer = false, - const property_list &PropertyList = {}); - // TODO: the following function to be removed during next ABI break window - AccessorBaseHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, int Dims, - int ElemSize, bool IsPlaceH, int OffsetInBytes = 0, - bool IsSubBuffer = false, - const property_list &PropertyList = {}); - AccessorBaseHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, void *SYCLMemObject, int Dims, int ElemSize, size_t OffsetInBytes = 0, diff --git a/sycl/include/sycl/backend.hpp b/sycl/include/sycl/backend.hpp index 0f8ebd75c4d9b..7fab7ed12c8c3 100644 --- a/sycl/include/sycl/backend.hpp +++ b/sycl/include/sycl/backend.hpp @@ -59,10 +59,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -// TODO each backend can have its own custom errc enumeration -// but the details for this are not fully specified yet -enum class backend_errc : unsigned int {}; - // Convert from PI backend to SYCL backend enum backend convertBackend(pi_platform_backend PiBackend); } // namespace detail @@ -74,8 +70,6 @@ template class backend_traits { template using return_type = typename detail::BackendReturn::type; - - using errc = detail::backend_errc; }; template @@ -269,7 +263,8 @@ __SYCL_EXPORT device make_device(pi_native_handle NativeHandle, backend Backend); __SYCL_EXPORT context make_context(pi_native_handle NativeHandle, const async_handler &Handler, - backend Backend); + backend Backend, bool KeepOwnership, + const std::vector &DeviceList = {}); __SYCL_EXPORT queue make_queue(pi_native_handle NativeHandle, int32_t nativeHandleDesc, const context &TargetContext, @@ -334,7 +329,7 @@ make_context( &BackendObject, const async_handler &Handler = {}) { return detail::make_context(detail::pi::cast(BackendObject), - Handler, Backend); + Handler, Backend, false /* KeepOwnership */); } template diff --git a/sycl/include/sycl/backend/opencl.hpp b/sycl/include/sycl/backend/opencl.hpp index 9376498580fc7..66d1b04c6d925 100644 --- a/sycl/include/sycl/backend/opencl.hpp +++ b/sycl/include/sycl/backend/opencl.hpp @@ -24,54 +24,10 @@ namespace sycl { inline namespace _V1 { namespace opencl { -// Implementation of various "make" functions resides in SYCL RT because -// creating SYCL objects requires knowing details not accessible here. -// Note that they take opaque pi_native_handle that real OpenCL handles -// are casted to. -// -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle); -__SYCL_EXPORT device make_device(pi_native_handle NativeHandle); -__SYCL_EXPORT context make_context(pi_native_handle NativeHandle); -__SYCL_EXPORT queue make_queue(const context &Context, - pi_native_handle InteropHandle); - __SYCL_EXPORT bool has_extension(const sycl::platform &SyclPlatform, const std::string &Extension); __SYCL_EXPORT bool has_extension(const sycl::device &SyclDevice, const std::string &Extension); - -// Construction of SYCL platform. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_platform free function") -T make(typename detail::interop::type Interop) { - return make_platform(detail::pi::cast(Interop)); -} - -// Construction of SYCL device. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_device free function") -T make(typename detail::interop::type Interop) { - return make_device(detail::pi::cast(Interop)); -} - -// Construction of SYCL context. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_context free function") -T make(typename detail::interop::type Interop) { - return make_context(detail::pi::cast(Interop)); -} - -// Construction of SYCL queue. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_queue free function") -T make(const context &Context, - typename detail::interop::type Interop) { - return make_queue(Context, detail::pi::cast(Interop)); -} } // namespace opencl } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/backend_types.hpp b/sycl/include/sycl/backend_types.hpp index ead8f4674d0ef..c0a274febc9e7 100644 --- a/sycl/include/sycl/backend_types.hpp +++ b/sycl/include/sycl/backend_types.hpp @@ -21,8 +21,8 @@ enum class backend : char { ext_oneapi_level_zero = 2, ext_oneapi_cuda = 3, all = 4, - ext_intel_esimd_emulator __SYCL_DEPRECATED( - "esimd emulator is no longer supported") = 5, + // No support anymore: + // ext_intel_esimd_emulator = 5, ext_oneapi_hip = 6, ext_oneapi_native_cpu = 7, }; @@ -50,9 +50,6 @@ inline std::ostream &operator<<(std::ostream &Out, backend be) { case backend::ext_oneapi_cuda: Out << "ext_oneapi_cuda"; break; - case backend::ext_intel_esimd_emulator: - Out << "ext_intel_esimd_emulator"; - break; case backend::ext_oneapi_hip: Out << "ext_oneapi_hip"; break; @@ -76,8 +73,6 @@ inline std::string_view get_backend_name_no_vendor(backend Backend) { return "level_zero"; case backend::ext_oneapi_cuda: return "cuda"; - case backend::ext_intel_esimd_emulator: - return "esimd_emulator"; case backend::ext_oneapi_hip: return "hip"; case backend::ext_oneapi_native_cpu: diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp index 5dde105b678e6..32588de22c980 100644 --- a/sycl/include/sycl/buffer.hpp +++ b/sycl/include/sycl/buffer.hpp @@ -472,7 +472,13 @@ class buffer : public detail::buffer_plain, buffer &operator=(buffer &&rhs) = default; - ~buffer() { buffer_plain::handleRelease(); } + ~buffer() { + try { + buffer_plain::handleRelease(); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~buffer", e); + } + } bool operator==(const buffer &rhs) const { return impl == rhs.impl; } diff --git a/sycl/include/sycl/detail/common.hpp b/sycl/include/sycl/detail/common.hpp index 1c940a21a7223..09c31ef76ef6d 100644 --- a/sycl/include/sycl/detail/common.hpp +++ b/sycl/include/sycl/detail/common.hpp @@ -368,6 +368,17 @@ static constexpr std::array RepeatValue(const T &Arg) { return RepeatValueHelper(Arg, std::make_index_sequence()); } +// to output exceptions caught in ~destructors +#ifndef NDEBUG +#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e) \ + { \ + std::cerr << str << " " << e.what() << std::endl; \ + assert(false); \ + } +#else +#define __SYCL_REPORT_EXCEPTION_TO_STREAM(str, e) +#endif + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/detail/host_task_impl.hpp b/sycl/include/sycl/detail/host_task_impl.hpp index 1634269ac2f2b..42b49eab820c7 100644 --- a/sycl/include/sycl/detail/host_task_impl.hpp +++ b/sycl/include/sycl/detail/host_task_impl.hpp @@ -47,6 +47,8 @@ class HostTask { if (HPI) HPI->end(); } + + friend class DispatchHostTask; }; class CGHostTask : public CG { diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 995579d612afb..f2acd7e897399 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -215,4 +215,19 @@ _PI_API(piextDestroyExternalSemaphore) _PI_API(piextWaitExternalSemaphore) _PI_API(piextSignalExternalSemaphore) +// Virtual memory +_PI_API(piextVirtualMemGranularityGetInfo) +_PI_API(piextPhysicalMemCreate) +_PI_API(piextPhysicalMemRetain) +_PI_API(piextPhysicalMemRelease) +_PI_API(piextVirtualMemReserve) +_PI_API(piextVirtualMemFree) +_PI_API(piextVirtualMemMap) +_PI_API(piextVirtualMemUnmap) +_PI_API(piextVirtualMemSetAccess) +_PI_API(piextVirtualMemGetInfo) + +// Enqueue native command +_PI_API(piextEnqueueNativeCommand) + #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index f4e67f7ba6113..f541c3e4e89d2 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -191,9 +191,14 @@ // `win32_nt_dx12_resource` value. // the `pi_external_semaphore_handle_type` enum now has a new // `win32_nt_dx12_fence` value. +// 15.54 Added piextVirtualMem* functions, and piextPhysicalMem* functions, +// PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM device info descriptor, +// _pi_virtual_mem_granularity_info enum, _pi_virtual_mem_info enum and +// pi_virtual_access_flags bit flags. +// 15.55 Added piextEnqueueNativeCommand as well as associated types and enums #define _PI_H_VERSION_MAJOR 15 -#define _PI_H_VERSION_MINOR 53 +#define _PI_H_VERSION_MINOR 55 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -312,7 +317,8 @@ typedef enum { PI_EXT_PLATFORM_BACKEND_OPENCL = 2, ///< The backend is OpenCL PI_EXT_PLATFORM_BACKEND_CUDA = 3, ///< The backend is CUDA PI_EXT_PLATFORM_BACKEND_HIP = 4, ///< The backend is HIP - PI_EXT_PLATFORM_BACKEND_ESIMD = 5, ///< The backend is ESIMD + // Not supported anymore: + // PI_EXT_PLATFORM_BACKEND_ESIMD = 5, PI_EXT_PLATFORM_BACKEND_NATIVE_CPU = 6, ///< The backend is NATIVE_CPU } _pi_platform_backend; @@ -504,6 +510,11 @@ typedef enum { // Timestamp enqueue PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT = 0x2011D, + + // Virtual memory support + PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM = 0x2011E, + // Native enqueue + PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT = 0x2011F, } _pi_device_info; typedef enum { @@ -755,6 +766,15 @@ typedef enum { PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS = 0x1143, } _pi_sampler_cubemap_filter_mode; +typedef enum { + PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM = 0x30100, + PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED = 0x30101, +} _pi_virtual_mem_granularity_info; + +typedef enum { + PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE = 0x30200, +} _pi_virtual_mem_info; + using pi_context_properties = intptr_t; using pi_device_exec_capabilities = pi_bitfield; @@ -847,6 +867,10 @@ constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE = (1 << constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE = (1 << 8); // clang-format on +using pi_virtual_access_flags = pi_bitfield; +constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_RW = (1 << 0); +constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_READ_ONLY = (1 << 1); + typedef enum { // No preference for SLM or data cache. PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT = 0x0, @@ -888,6 +912,8 @@ using pi_program_binary_type = _pi_program_binary_type; using pi_kernel_info = _pi_kernel_info; using pi_profiling_info = _pi_profiling_info; using pi_kernel_cache_config = _pi_kernel_cache_config; +using pi_virtual_mem_granularity_info = _pi_virtual_mem_granularity_info; +using pi_virtual_mem_info = _pi_virtual_mem_info; using pi_image_copy_flags = _pi_image_copy_flags; @@ -1240,6 +1266,7 @@ struct _pi_program; struct _pi_kernel; struct _pi_event; struct _pi_sampler; +struct _pi_physical_mem; using pi_platform = _pi_platform *; using pi_device = _pi_device *; @@ -1254,6 +1281,8 @@ using pi_image_handle = pi_uint64; using pi_image_mem_handle = void *; using pi_interop_mem_handle = pi_uint64; using pi_interop_semaphore_handle = pi_uint64; +using pi_physical_mem = _pi_physical_mem *; +using pi_enqueue_native_command_function = void (*)(pi_queue, void *); typedef struct { pi_image_channel_order image_channel_order; @@ -2337,6 +2366,125 @@ pi_result piextEnqueueDeviceGlobalVariableRead( size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event); +/// +/// Virtual memory +/// + +/// API for getting information about the minimum and recommended granularity +/// of physical and virtual memory. +/// +/// \param context is the context to get the granularity from. +/// \param device is the device to get the granularity from. +/// \param param_name is the type of query to perform. +/// \param param_value_size is the size of the result in bytes. +/// \param param_value is the result. +/// \param param_value_size_ret is how many bytes were written. +__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo( + pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); + +/// API for creating a physical memory handle that virtual memory can be mapped +/// to. +/// +/// \param context is the context within which the physical memory is allocated. +/// \param device is the device the physical memory is on. +/// \param mem_size is the size of physical memory to allocate. This must be a +/// multiple of the minimum virtual memory granularity. +/// \param ret_physical_mem is the handle for the resulting physical memory. +__SYCL_EXPORT pi_result +piextPhysicalMemCreate(pi_context context, pi_device device, size_t mem_size, + pi_physical_mem *ret_physical_mem); + +/// API for retaining a physical memory handle. +/// +/// \param physical_mem is the handle for the physical memory to retain. +__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem); + +/// API for releasing a physical memory handle. +/// +/// \param physical_mem is the handle for the physical memory to free. +__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem); + +/// API for reserving a virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// reserved. +/// \param start is a pointer to the start of the region to reserve. If nullptr +/// the implementation selects a start address. +/// \param range_size is the size of the virtual address range to reserve in +/// bytes. +/// \param ret_ptr is the pointer to the start of the resulting virtual memory +/// range. +__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context context, + const void *start, + size_t range_size, + void **ret_ptr); + +/// API for freeing a virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// reserved. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range. +__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size); + +/// API for mapping a virtual memory range to a a physical memory allocation at +/// a given offset. +/// +/// \param context is the context within which both the virtual memory range is +/// reserved and the physical memory is allocated. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range. +/// \param physical_mem is the handle for the physical memory to map ptr to. +/// \param offset is the offset into physical_mem in bytes to map ptr to. +/// \param flags is the access flags to set for the mapping. +__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, + pi_physical_mem physical_mem, + size_t offset, + pi_virtual_access_flags flags); + +/// API for unmapping a virtual memory range previously mapped in a context. +/// After a call to this function, the virtual memory range is left in a state +/// ready to be remapped. +/// +/// \param context is the context within which the virtual memory range is +/// currently mapped. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range in bytes. +__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context context, + const void *ptr, + size_t range_size); + +/// API for setting the access mode of a mapped virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// currently mapped. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range in bytes. +/// \param flags is the access flags to set for the mapped virtual access range. +__SYCL_EXPORT pi_result piextVirtualMemSetAccess(pi_context context, + const void *ptr, + size_t range_size, + pi_virtual_access_flags flags); + +/// API for getting info about a mapped virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// currently mapped. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range in bytes. +/// \param param_name is the type of query to perform. +/// \param param_value_size is the size of the result in bytes. +/// \param param_value is the result. +/// \param param_value_size_ret is how many bytes were written. +__SYCL_EXPORT pi_result +piextVirtualMemGetInfo(pi_context context, const void *ptr, size_t range_size, + pi_virtual_mem_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); + /// /// Plugin /// @@ -3057,6 +3205,25 @@ __SYCL_EXPORT pi_result piextSignalExternalSemaphore( pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event); +/// API to enqueue work through a backend API such that the plugin can schedule +/// the backend API calls within its own DAG. +/// +/// \param command_queue is the queue instructed to signal +/// \param fn is the user submitted native function enqueueing work to a +/// backend API +/// \param data is the data that will be used in fn +/// \param num_mems is the number of mems in mem_list +/// \param mem_list is the list of mems that are used in fn +/// \param num_events_in_wait_list is the number of events in the wait list +/// \param event_wait_list is the list of events to wait on before this +/// operation +/// \param event is the returned event representing this operation +__SYCL_EXPORT pi_result piextEnqueueNativeCommand( + pi_queue command_queue, pi_enqueue_native_command_function fn, void *data, + pi_uint32 num_mems, const pi_mem *mem_list, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event); + typedef enum { _PI_SANITIZE_TYPE_NONE = 0x0, _PI_SANITIZE_TYPE_ADDRESS = 0x1, diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp index 0a6713dab1096..1fe21d36a8aaa 100644 --- a/sycl/include/sycl/detail/pi.hpp +++ b/sycl/include/sycl/detail/pi.hpp @@ -69,7 +69,6 @@ bool trace(TraceLevel level); #define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" #define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "pi_esimd_emulator.dll" #define __SYCL_HIP_PLUGIN_NAME "pi_hip.dll" #define __SYCL_UR_PLUGIN_NAME "pi_unified_runtime.dll" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "pi_native_cpu.dll" @@ -77,7 +76,6 @@ bool trace(TraceLevel level); #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dll" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dll" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dll" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.dll" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" #define __SYCL_UR_PLUGIN_NAME "libpi_unified_runtime.dll" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.dll" @@ -86,7 +84,6 @@ bool trace(TraceLevel level); #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.so" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.so" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.so" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.so" #define __SYCL_UR_PLUGIN_NAME "libpi_unified_runtime.so" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.so" @@ -94,7 +91,6 @@ bool trace(TraceLevel level); #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dylib" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dylib" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dylib" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.dylib" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dylib" #define __SYCL_UR_PLUGIN_NAME "libpi_unified_runtime.dylib" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.dylib" @@ -150,6 +146,8 @@ using PiExternalMemDescriptor = ::pi_external_mem_descriptor; using PiExternalSemaphoreDescriptor = ::pi_external_semaphore_descriptor; using PiImageOffset = ::pi_image_offset_struct; using PiImageRegion = ::pi_image_region_struct; +using PiPhysicalMem = ::pi_physical_mem; +using PiVirtualAccessFlags = ::pi_virtual_access_flags; __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext, pi_context_extended_deleter func, diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp index df6c827de60f2..d756b0a62e88a 100644 --- a/sycl/include/sycl/device_aspect_macros.hpp +++ b/sycl/include/sycl/device_aspect_macros.hpp @@ -381,6 +381,11 @@ #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__ 0 #endif +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_virtual_mem__ +// __SYCL_ASPECT(ext_oneapi_virtual_mem, 74) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_virtual_mem__ 0 +#endif + #ifndef __SYCL_ANY_DEVICE_HAS_host__ // __SYCL_ASPECT(host, 0) #define __SYCL_ANY_DEVICE_HAS_host__ 0 @@ -750,3 +755,8 @@ // __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__ 0 #endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_virtual_mem__ +// __SYCL_ASPECT(ext_oneapi_virtual_mem, 74) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_virtual_mem__ 0 +#endif diff --git a/sycl/include/sycl/exception.hpp b/sycl/include/sycl/exception.hpp index 472d5b335f141..ef5c3f2d396d2 100644 --- a/sycl/include/sycl/exception.hpp +++ b/sycl/include/sycl/exception.hpp @@ -10,7 +10,6 @@ // 4.9.2 Exception Class Interface -#include // for backend #include // for cl_int #include // for __SYCL2020_DEPRECATED #include // for __SYCL_EXPORT @@ -49,8 +48,6 @@ enum class errc : unsigned int { backend_mismatch = 14, }; -template using errc_for = typename backend_traits::errc; - /// Constructs an error code using e and sycl_category() __SYCL_EXPORT std::error_code make_error_code(sycl::errc E) noexcept; diff --git a/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp b/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp index b63a2e2225173..a37cf98fd38f0 100644 --- a/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp +++ b/sycl/include/sycl/ext/oneapi/backend/level_zero.hpp @@ -41,76 +41,10 @@ namespace sycl { inline namespace _V1 { -namespace ext::oneapi::level_zero { -// Implementation of various "make" functions resides in libsycl.so and thus -// their interface needs to be backend agnostic. -// TODO: remove/merge with similar functions in sycl::detail -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle); +namespace ext::oneapi::level_zero::detail { __SYCL_EXPORT device make_device(const platform &Platform, pi_native_handle NativeHandle); -__SYCL_EXPORT context make_context(const std::vector &DeviceList, - pi_native_handle NativeHandle, - bool keep_ownership = false); -__SYCL_EXPORT queue make_queue(const context &Context, const device &Device, - pi_native_handle InteropHandle, - bool IsImmCmdList, bool keep_ownership, - const property_list &Properties); -__SYCL_EXPORT event make_event(const context &Context, - pi_native_handle InteropHandle, - bool keep_ownership = false); - -// Construction of SYCL platform. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_platform free function") -T make(typename sycl::detail::interop::type - Interop) { - return make_platform(reinterpret_cast(Interop)); -} - -// Construction of SYCL device. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_device free function") -T make(const platform &Platform, - typename sycl::detail::interop::type - Interop) { - return make_device(Platform, reinterpret_cast(Interop)); -} - -/// Construction of SYCL context. -/// \param DeviceList is a vector of devices which must be encapsulated by -/// created SYCL context. Provided devices and native context handle must -/// be associated with the same platform. -/// \param Interop is a Level Zero native context handle. -/// \param Ownership (optional) specifies who will assume ownership of the -/// native context handle. Default is that SYCL RT does, so it destroys -/// the native handle when the created SYCL object goes out of life. -/// -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_context free function") -T make(const std::vector &DeviceList, - typename sycl::detail::interop::type - Interop, - ownership Ownership = ownership::transfer) { - return make_context(DeviceList, - sycl::detail::pi::cast(Interop), - Ownership == ownership::keep); -} - -// Construction of SYCL event. -template > * = nullptr> -__SYCL_DEPRECATED("Use SYCL 2020 sycl::make_event free function") -T make(const context &Context, - typename sycl::detail::interop::type - Interop, - ownership Ownership = ownership::transfer) { - return make_event(Context, reinterpret_cast(Interop), - Ownership == ownership::keep); -} - -} // namespace ext::oneapi::level_zero +} // namespace ext::oneapi::level_zero::detail // Specialization of sycl::make_context for Level-Zero backend. template <> @@ -118,11 +52,16 @@ inline context make_context( const backend_input_t &BackendObject, const async_handler &Handler) { - (void)Handler; - return ext::oneapi::level_zero::make_context( - BackendObject.DeviceList, - detail::pi::cast(BackendObject.NativeHandle), - BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep); + + const std::vector &DeviceList = BackendObject.DeviceList; + pi_native_handle NativeHandle = + detail::pi::cast(BackendObject.NativeHandle); + bool KeepOwnership = + BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep; + + return sycl::detail::make_context(NativeHandle, Handler, + backend::ext_oneapi_level_zero, + KeepOwnership, DeviceList); } namespace detail { @@ -191,7 +130,6 @@ template <> inline queue make_queue( const backend_input_t &BackendObject, const context &TargetContext, const async_handler Handler) { - (void)Handler; const device Device = device{BackendObject.Device}; bool IsImmCmdList = std::holds_alternative( BackendObject.NativeHandle); @@ -202,10 +140,11 @@ inline queue make_queue( : reinterpret_cast( *(std::get_if( &BackendObject.NativeHandle))); - return ext::oneapi::level_zero::make_queue( - TargetContext, Device, Handle, IsImmCmdList, + + return sycl::detail::make_queue( + Handle, IsImmCmdList, TargetContext, &Device, BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep, - BackendObject.Properties); + BackendObject.Properties, Handler, backend::ext_oneapi_level_zero); } // Specialization of sycl::get_native for Level-Zero backend. @@ -227,10 +166,11 @@ template <> inline event make_event( const backend_input_t &BackendObject, const context &TargetContext) { - return ext::oneapi::level_zero::make_event( - TargetContext, + return sycl::detail::make_event( detail::pi::cast(BackendObject.NativeHandle), - BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep); + TargetContext, + BackendObject.Ownership == ext::oneapi::level_zero::ownership::keep, + backend::ext_oneapi_level_zero); } // Specialization of sycl::make_kernel_bundle for Level-Zero backend. diff --git a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp index 3201cf94f4065..7f10dd7f79e85 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp @@ -8,7 +8,7 @@ #pragma once -#include // for std::forward +#include #include #include @@ -72,14 +72,20 @@ template struct LaunchConfigAccess { return MLaunchConfig.getProperties(); } }; + +template +void submit_impl(queue &Q, CommandGroupFunc &&CGF, + const sycl::detail::code_location &CodeLoc) { + Q.submit_without_event(std::forward(CGF), CodeLoc); +} } // namespace detail template void submit(queue Q, CommandGroupFunc &&CGF, const sycl::detail::code_location &CodeLoc = sycl::detail::code_location::current()) { - // TODO: Use new submit without Events. - Q.submit(std::forward(CGF), CodeLoc); + sycl::ext::oneapi::experimental::detail::submit_impl( + Q, std::forward(CGF), CodeLoc); } template @@ -205,7 +211,8 @@ template Range, const KernelType &KernelObj, ReductionsT &&...Reductions) { submit(Q, [&](handler &CGH) { - nd_launch(CGH, Range, KernelObj, std::forward(Reductions)...); + nd_launch(CGH, Range, KernelObj, + std::forward(Reductions)...); }); } @@ -228,7 +235,8 @@ template , Properties> Config, const KernelType &KernelObj, ReductionsT &&...Reductions) { submit(Q, [&](handler &CGH) { - nd_launch(CGH, Config, KernelObj, std::forward(Reductions)...); + nd_launch(CGH, Config, KernelObj, + std::forward(Reductions)...); }); } @@ -270,11 +278,9 @@ inline void memcpy(handler &CGH, void *Dest, const void *Src, size_t NumBytes) { CGH.memcpy(Dest, Src, NumBytes); } -inline void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, - const sycl::detail::code_location &CodeLoc = - sycl::detail::code_location::current()) { - submit(Q, [&](handler &CGH) { memcpy(CGH, Dest, Src, NumBytes); }, CodeLoc); -} +__SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, + const sycl::detail::code_location &CodeLoc = + sycl::detail::code_location::current()); template void copy(handler &CGH, const T *Src, T *Dest, size_t Count) { @@ -292,11 +298,9 @@ inline void memset(handler &CGH, void *Ptr, int Value, size_t NumBytes) { CGH.memset(Ptr, Value, NumBytes); } -inline void memset(queue Q, void *Ptr, int Value, size_t NumBytes, - const sycl::detail::code_location &CodeLoc = - sycl::detail::code_location::current()) { - submit(Q, [&](handler &CGH) { memset(CGH, Ptr, Value, NumBytes); }, CodeLoc); -} +__SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes, + const sycl::detail::code_location &CodeLoc = + sycl::detail::code_location::current()); template void fill(sycl::handler &CGH, T *Ptr, const T &Pattern, size_t Count) { @@ -324,13 +328,9 @@ inline void mem_advise(handler &CGH, void *Ptr, size_t NumBytes, int Advice) { CGH.mem_advise(Ptr, NumBytes, Advice); } -inline void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice, - const sycl::detail::code_location &CodeLoc = - sycl::detail::code_location::current()) { - submit( - Q, [&](handler &CGH) { mem_advise(CGH, Ptr, NumBytes, Advice); }, - CodeLoc); -} +__SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice, + const sycl::detail::code_location &CodeLoc = + sycl::detail::code_location::current()); inline void barrier(handler &CGH) { CGH.ext_oneapi_barrier(); } diff --git a/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp b/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp new file mode 100644 index 0000000000000..24d371fe8c6fd --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp @@ -0,0 +1,81 @@ +//==--- physical_mem.hpp - sycl_ext_oneapi_virtual_mem physical_mem class --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { + +namespace detail { +class physical_mem_impl; +} // namespace detail + +namespace ext::oneapi::experimental { + +enum class address_access_mode : char { none = 0, read = 1, read_write = 2 }; + +class __SYCL_EXPORT physical_mem + : public sycl::detail::OwnerLessBase { +public: + physical_mem(const device &SyclDevice, const context &SyclContext, + size_t NumBytes); + + physical_mem(const queue &SyclQueue, size_t NumBytes) + : physical_mem(SyclQueue.get_device(), SyclQueue.get_context(), + NumBytes) {} + + physical_mem(const physical_mem &rhs) = default; + physical_mem(physical_mem &&rhs) = default; + + physical_mem &operator=(const physical_mem &rhs) = default; + physical_mem &operator=(physical_mem &&rhs) = default; + + ~physical_mem() noexcept(false) {}; + + bool operator==(const physical_mem &rhs) const { return impl == rhs.impl; } + bool operator!=(const physical_mem &rhs) const { return !(*this == rhs); } + + void *map(uintptr_t Ptr, size_t NumBytes, address_access_mode Mode, + size_t Offset = 0) const; + + context get_context() const; + device get_device() const; + + size_t size() const noexcept; + +private: + std::shared_ptr impl; + + template + friend decltype(Obj::impl) + sycl::detail::getSyclObjImpl(const Obj &SyclObject); + + template + friend T sycl::detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); +}; + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl + +namespace std { +template <> struct hash { + size_t operator()( + const sycl::ext::oneapi::experimental::physical_mem &PhysicalMem) const { + return hash>()( + sycl::detail::getSyclObjImpl(PhysicalMem)); + } +}; +} // namespace std diff --git a/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp b/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp new file mode 100644 index 0000000000000..74a42354eaa01 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp @@ -0,0 +1,61 @@ +//==- virtual_mem.hpp - sycl_ext_oneapi_virtual_mem virtual mem free funcs -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +enum class granularity_mode : char { + minimum = 0, + recommended = 1, +}; + +__SYCL_EXPORT size_t +get_mem_granularity(const device &SyclDevice, const context &SyclContext, + granularity_mode Mode = granularity_mode::recommended); + +__SYCL_EXPORT size_t +get_mem_granularity(const context &SyclContext, + granularity_mode Mode = granularity_mode::recommended); + +__SYCL_EXPORT uintptr_t reserve_virtual_mem(uintptr_t Start, size_t NumBytes, + const context &SyclContext); + +inline uintptr_t reserve_virtual_mem(size_t NumBytes, + const context &SyclContext) { + return reserve_virtual_mem(0, NumBytes, SyclContext); +} + +__SYCL_EXPORT void free_virtual_mem(uintptr_t Ptr, size_t NumBytes, + const context &SyclContext); + +__SYCL_EXPORT void set_access_mode(const void *Ptr, size_t NumBytes, + address_access_mode Mode, + const context &SyclContext); + +__SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr, + size_t NumBytes, + const context &SyclContext); + +__SYCL_EXPORT void unmap(const void *Ptr, size_t NumBytes, + const context &SyclContext); + +} // Namespace ext::oneapi::experimental +} // namespace _V1 +} // Namespace sycl diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index a71f5400a813d..0d3e757c175b0 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -465,6 +465,7 @@ class __SYCL_EXPORT handler { /// /// \param Queue is a SYCL queue. /// \param IsHost indicates if this handler is created for SYCL host device. + /// TODO: Unused. Remove with ABI break. handler(std::shared_ptr Queue, bool IsHost); /// Constructs SYCL handler from the associated queue and the submission's @@ -476,10 +477,36 @@ class __SYCL_EXPORT handler { /// \param SecondaryQueue is the secondary SYCL queue of the submission. This /// is null if no secondary queue is associated with the submission. /// \param IsHost indicates if this handler is created for SYCL host device. + /// TODO: Unused. Remove with ABI break. handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool IsHost); + /// Constructs SYCL handler from queue. + /// + /// \param Queue is a SYCL queue. + /// \param IsHost indicates if this handler is created for SYCL host device. + /// \param CallerNeedsEvent indicates if the event resulting from this handler + /// is needed by the caller. + handler(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent); + + /// Constructs SYCL handler from the associated queue and the submission's + /// primary and secondary queue. + /// + /// \param Queue is a SYCL queue. This is equal to either PrimaryQueue or + /// SecondaryQueue. + /// \param PrimaryQueue is the primary SYCL queue of the submission. + /// \param SecondaryQueue is the secondary SYCL queue of the submission. This + /// is null if no secondary queue is associated with the submission. + /// \param IsHost indicates if this handler is created for SYCL host device. + /// \param CallerNeedsEvent indicates if the event resulting from this handler + /// is needed by the caller. + handler(std::shared_ptr Queue, + std::shared_ptr PrimaryQueue, + std::shared_ptr SecondaryQueue, bool IsHost, + bool CallerNeedsEvent); + /// Constructs SYCL handler from Graph. /// /// The hander will add the command-group as a node to the graph rather than @@ -489,12 +516,10 @@ class __SYCL_EXPORT handler { handler(std::shared_ptr Graph); /// Stores copy of Arg passed to the CGData.MArgsStorage. - template >> - F *storePlainArg(T &&Arg) { + template void *storePlainArg(T &&Arg) { CGData.MArgsStorage.emplace_back(sizeof(T)); - auto Storage = reinterpret_cast(CGData.MArgsStorage.back().data()); - *Storage = Arg; + void *Storage = static_cast(CGData.MArgsStorage.back().data()); + std::memcpy(Storage, &Arg, sizeof(T)); return Storage; } @@ -577,6 +602,16 @@ class __SYCL_EXPORT handler { /// \return a SYCL event object representing the command group event finalize(); + /// Constructs CG object of specific type, passes it to Scheduler and + /// returns sycl::event object representing the command group. + /// It's expected that the method is the latest method executed before + /// object destruction. + /// \param CallerNeedsEvent Specifies if the caller needs an event + /// representing the work related to this handler. + /// + /// \return a SYCL event object representing the command group + event finalize(bool CallerNeedsEvent); + /// Saves streams associated with this handler. /// /// Streams are then forwarded to command group and flushed in the scheduler. @@ -691,7 +726,7 @@ class __SYCL_EXPORT handler { } template void setArgHelper(int ArgIndex, T &&Arg) { - auto StoredArg = static_cast(storePlainArg(Arg)); + void *StoredArg = storePlainArg(Arg); if (!std::is_same::value && std::is_pointer::value) { MArgs.emplace_back(detail::kernel_param_kind_t::kind_pointer, StoredArg, @@ -703,7 +738,7 @@ class __SYCL_EXPORT handler { } void setArgHelper(int ArgIndex, sampler &&Arg) { - auto StoredArg = static_cast(storePlainArg(Arg)); + void *StoredArg = storePlainArg(Arg); MArgs.emplace_back(detail::kernel_param_kind_t::kind_sampler, StoredArg, sizeof(sampler), ArgIndex); } @@ -1186,6 +1221,8 @@ class __SYCL_EXPORT handler { Size == 32 || Size == 64 || Size == 128; } + bool eventNeeded() const; + template struct TransformUserItemType { using type = std::conditional_t< std::is_convertible_v, LambdaArgType>, nd_item, diff --git a/sycl/include/sycl/image.hpp b/sycl/include/sycl/image.hpp index 1239f65cdd259..2d0401764bbe9 100644 --- a/sycl/include/sycl/image.hpp +++ b/sycl/include/sycl/image.hpp @@ -954,7 +954,12 @@ class unsampled_image unsampled_image &operator=(unsampled_image &&rhs) = default; ~unsampled_image() { - common_base::unsampledImageDestructorNotification((void *)this->impl.get()); + try { + common_base::unsampledImageDestructorNotification( + (void *)this->impl.get()); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~unsampled_image", e); + } } bool operator==(const unsampled_image &rhs) const { @@ -1095,7 +1100,11 @@ class sampled_image sampled_image &operator=(sampled_image &&rhs) = default; ~sampled_image() { - common_base::sampledImageDestructorNotification((void *)this->impl.get()); + try { + common_base::sampledImageDestructorNotification((void *)this->impl.get()); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~sampled_image", e); + } } bool operator==(const sampled_image &rhs) const { diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 2d9cee1351d7a..3b744a89dbb90 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -68,3 +68,4 @@ __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 70) __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 71) __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 72) __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73) +__SYCL_ASPECT(ext_oneapi_virtual_mem, 74) diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 4d32218ab09d4..04b6969fe2b12 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -95,6 +95,12 @@ namespace ext ::oneapi ::experimental { // returned by info::queue::state enum class queue_state { executing, recording }; struct image_descriptor; + +namespace detail { +template +void submit_impl(queue &Q, CommandGroupFunc &&CGF, + const sycl::detail::code_location &CodeLoc); +} // namespace detail } // namespace ext::oneapi::experimental /// Encapsulates a single SYCL queue which schedules kernels on a SYCL device. @@ -2689,6 +2695,11 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const detail::code_location &); #endif + template + friend void ext::oneapi::experimental::detail::submit_impl( + queue &Q, CommandGroupFunc &&CGF, + const sycl::detail::code_location &CodeLoc); + /// A template-free version of submit. event submit_impl(std::function CGH, const detail::code_location &CodeLoc); @@ -2696,6 +2707,28 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { event submit_impl(std::function CGH, queue secondQueue, const detail::code_location &CodeLoc); + /// A template-free version of submit_without_event. + void submit_without_event_impl(std::function CGH, + const detail::code_location &CodeLoc); + + /// Submits a command group function object to the queue, in order to be + /// scheduled for execution on the device. + /// + /// \param CGF is a function object containing command group. + /// \param CodeLoc is the code location of the submit call (default argument) + template + std::enable_if_t, void> + submit_without_event(T CGF, const detail::code_location &CodeLoc) { + detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); +#if __SYCL_USE_FALLBACK_ASSERT + // If post-processing is needed, fall back to the regular submit. + // TODO: Revisit whether we can avoid this. + submit(CGF, CodeLoc); +#else + submit_without_event_impl(CGF, CodeLoc); +#endif // __SYCL_USE_FALLBACK_ASSERT + } + /// Checks if the event needs to be discarded and if so, discards it and /// returns a discarded event. Otherwise, it returns input event. /// TODO: move to impl class in the next ABI Breaking window diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index cd6ea5bcf3a93..3e9fca6dd91fc 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -1178,8 +1178,9 @@ namespace reduction { inline void finalizeHandler(handler &CGH) { CGH.finalize(); } template void withAuxHandler(handler &CGH, FunctorTy Func) { event E = CGH.finalize(); - handler AuxHandler(CGH.MQueue, CGH.MIsHost); - AuxHandler.depends_on(E); + handler AuxHandler(CGH.MQueue, CGH.MIsHost, CGH.eventNeeded()); + if (!createSyclObjFromImpl(CGH.MQueue).is_in_order()) + AuxHandler.depends_on(E); AuxHandler.saveCodeLoc(CGH.MCodeLoc); Func(AuxHandler); CGH.MLastEvent = AuxHandler.finalize(); diff --git a/sycl/include/sycl/sub_group.hpp b/sycl/include/sycl/sub_group.hpp index ef44c750d9e14..f80b0876a65a3 100644 --- a/sycl/include/sycl/sub_group.hpp +++ b/sycl/include/sycl/sub_group.hpp @@ -209,64 +209,6 @@ struct sub_group { #endif } - template - using EnableIfIsScalarArithmetic = - std::enable_if_t::value, T>; - - /* --- one-input shuffles --- */ - /* indices in [0 , sub_group size) */ - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle(T x, id_type local_id) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::Shuffle(*this, x, local_id); -#else - (void)x; - (void)local_id; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle_down(T x, uint32_t delta) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::ShuffleDown(*this, x, delta); -#else - (void)x; - (void)delta; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle_up(T x, uint32_t delta) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::ShuffleUp(*this, x, delta); -#else - (void)x; - (void)delta; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Shuffles in the sub-group class are deprecated.") - T shuffle_xor(T x, id_type value) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::ShuffleXor(*this, x, value); -#else - (void)x; - (void)value; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - /* --- sub_group load/stores --- */ /* these can map to SIMD or block read/write hardware where available */ #ifdef __SYCL_DEVICE_ONLY__ @@ -643,125 +585,6 @@ struct sub_group { #endif } -#ifndef __INTEL_PREVIEW_BREAKING_CHANGES__ - /* --- deprecated collective functions --- */ - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::broadcast instead.") - EnableIfIsScalarArithmetic broadcast(T x, id<1> local_id) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::spirv::GroupBroadcast(x, local_id); -#else - (void)x; - (void)local_id; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::reduce instead.") - EnableIfIsScalarArithmetic reduce(T x, BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::calc<__spv::GroupOperation::Reduce>( - typename sycl::detail::GroupOpTag::type(), *this, x, op); -#else - (void)x; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::reduce instead.") - EnableIfIsScalarArithmetic reduce(T x, T init, BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - return op(init, reduce(x, op)); -#else - (void)x; - (void)init; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::exclusive_scan instead.") - EnableIfIsScalarArithmetic exclusive_scan(T x, BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::calc<__spv::GroupOperation::ExclusiveScan>( - typename sycl::detail::GroupOpTag::type(), *this, x, op); -#else - (void)x; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::exclusive_scan instead.") - EnableIfIsScalarArithmetic exclusive_scan(T x, T init, - BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - if (get_local_id().get(0) == 0) { - x = op(init, x); - } - T scan = exclusive_scan(x, op); - if (get_local_id().get(0) == 0) { - scan = init; - } - return scan; -#else - (void)x; - (void)init; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::inclusive_scan instead.") - EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op) const { -#ifdef __SYCL_DEVICE_ONLY__ - return sycl::detail::calc<__spv::GroupOperation::InclusiveScan>( - typename sycl::detail::GroupOpTag::type(), *this, x, op); -#else - (void)x; - (void)op; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } - - template - __SYCL_DEPRECATED("Collectives in the sub-group class are deprecated. Use " - "sycl::ext::oneapi::inclusive_scan instead.") - EnableIfIsScalarArithmetic inclusive_scan(T x, BinaryOperation op, - T init) const { -#ifdef __SYCL_DEVICE_ONLY__ - if (get_local_id().get(0) == 0) { - x = op(init, x); - } - return inclusive_scan(x, op); -#else - (void)x; - (void)op; - (void)init; - throw sycl::exception(make_error_code(errc::feature_not_supported), - "Sub-groups are not supported on host."); -#endif - } -#endif // __INTEL_PREVIEW_BREAKING_CHANGES__ - linear_id_type get_group_linear_range() const { #ifdef __SYCL_DEVICE_ONLY__ return static_cast(get_group_range()[0]); diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 53a60381f0b8d..16b5e8f0f6c40 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -111,4 +111,6 @@ #include #include #include +#include +#include #include diff --git a/sycl/include/sycl/vector.hpp b/sycl/include/sycl/vector.hpp index d5d193bfa7add..2029d165464ce 100644 --- a/sycl/include/sycl/vector.hpp +++ b/sycl/include/sycl/vector.hpp @@ -985,6 +985,11 @@ template class vec { MultiPtr(Acc); load(Offset, MultiPtr); } + void load(size_t Offset, const DataT *Ptr) { + for (int I = 0; I < NumElements; ++I) + setValue(I, Ptr[Offset * NumElements + I]); + } + template void store(size_t Offset, multi_ptr Ptr) const { @@ -1004,6 +1009,10 @@ template class vec { MultiPtr(Acc); store(Offset, MultiPtr); } + void store(size_t Offset, DataT *Ptr) const { + for (int I = 0; I < NumElements; ++I) + Ptr[Offset * NumElements + I] = getValue(I); + } void ConvertToDataT() { for (size_t i = 0; i < NumElements; ++i) { diff --git a/sycl/include/sycl/vector_preview.hpp b/sycl/include/sycl/vector_preview.hpp index 95438835e31f9..d0549dbefd817 100644 --- a/sycl/include/sycl/vector_preview.hpp +++ b/sycl/include/sycl/vector_preview.hpp @@ -572,6 +572,11 @@ class vec : public detail::vec_arith { MultiPtr(Acc); load(Offset, MultiPtr); } + void load(size_t Offset, const DataT *Ptr) { + for (int I = 0; I < NumElements; ++I) + m_Data[I] = Ptr[Offset * NumElements + I]; + } + template void store(size_t Offset, multi_ptr Ptr) const { @@ -591,6 +596,10 @@ class vec : public detail::vec_arith { MultiPtr(Acc); store(Offset, MultiPtr); } + void store(size_t Offset, DataT *Ptr) const { + for (int I = 0; I < NumElements; ++I) + Ptr[Offset * NumElements + I] = m_Data[I]; + } private: // fields diff --git a/sycl/include/syclcompat/device.hpp b/sycl/include/syclcompat/device.hpp index ed16a9b32bfa4..3e3e6cb77e71d 100644 --- a/sycl/include/syclcompat/device.hpp +++ b/sycl/include/syclcompat/device.hpp @@ -339,9 +339,13 @@ class device_ext : public sycl::device { public: device_ext() : sycl::device(), _ctx(*this) {} ~device_ext() { - std::lock_guard lock(m_mutex); - sycl::event::wait(_events); - _queues.clear(); + try { + std::lock_guard lock(m_mutex); + sycl::event::wait(_events); + _queues.clear(); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~device_ext", e); + } } device_ext(const sycl::device &base, bool print_on_async_exceptions = false, bool in_order = true) diff --git a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp b/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp index 52d419c93ee05..f7561b719bd56 100644 --- a/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp +++ b/sycl/pi_win_proxy_loader/pi_win_proxy_loader.cpp @@ -89,7 +89,6 @@ std::wstring getCurrentDSODir() { #define __SYCL_OPENCL_PLUGIN_NAME "pi_opencl.dll" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" #define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "pi_esimd_emulator.dll" #define __SYCL_HIP_PLUGIN_NAME "pi_hip.dll" #define __SYCL_UNIFIED_RUNTIME_PLUGIN_NAME "pi_unified_runtime.dll" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "pi_native_cpu.dll" @@ -97,7 +96,6 @@ std::wstring getCurrentDSODir() { #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dll" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dll" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dll" -#define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.dll" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" #define __SYCL_UNIFIED_RUNTIME_PLUGIN_NAME "libpi_unified_runtime.dll" #define __SYCL_NATIVE_CPU_PLUGIN_NAME "libpi_native_cpu.dll" @@ -147,7 +145,6 @@ void preloadLibraries() { loadPlugin(__SYCL_OPENCL_PLUGIN_NAME); loadPlugin(__SYCL_LEVEL_ZERO_PLUGIN_NAME); loadPlugin(__SYCL_CUDA_PLUGIN_NAME); - loadPlugin(__SYCL_ESIMD_EMULATOR_PLUGIN_NAME); loadPlugin(__SYCL_HIP_PLUGIN_NAME); loadPlugin(__SYCL_UNIFIED_RUNTIME_PLUGIN_NAME); loadPlugin(__SYCL_NATIVE_CPU_PLUGIN_NAME); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 0077b245905db..13b44ce9a701d 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1298,6 +1298,79 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ParamValueSizeRet); } +pi_result +piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result piextPhysicalMemCreate(pi_context context, pi_device device, + size_t mem_size, + pi_physical_mem *ret_physical_mem) { + return pi2ur::piextPhysicalMemCreate(context, device, mem_size, + ret_physical_mem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRetain(physical_mem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRelease(physical_mem); +} + +pi_result piextVirtualMemReserve(pi_context context, const void *start, + size_t range_size, void **ret_ptr) { + return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); +} + +pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemFree(context, ptr, range_size); +} + +pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, pi_physical_mem physical_mem, + size_t offset, pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, + offset, flags); +} + +pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); +} + +pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); +} + +pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index 2b5d77b26ea9d..8c5112f4cc9d1 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -81,4 +82,8 @@ struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ { using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_; }; +struct _pi_physical_mem : ur_physical_mem_handle_t_ { + using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; +}; + #endif // PI_CUDA_HPP diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 33b7388f9c884..074909bcd6736 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1301,6 +1301,79 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ParamValueSizeRet); } +pi_result +piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result piextPhysicalMemCreate(pi_context context, pi_device device, + size_t mem_size, + pi_physical_mem *ret_physical_mem) { + return pi2ur::piextPhysicalMemCreate(context, device, mem_size, + ret_physical_mem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRetain(physical_mem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRelease(physical_mem); +} + +pi_result piextVirtualMemReserve(pi_context context, const void *start, + size_t range_size, void **ret_ptr) { + return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); +} + +pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemFree(context, ptr, range_size); +} + +pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, pi_physical_mem physical_mem, + size_t offset, pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, + offset, flags); +} + +pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); +} + +pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); +} + +pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp index 018d069f5fe7f..bec26c9866fdb 100644 --- a/sycl/plugins/hip/pi_hip.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -94,4 +95,8 @@ struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ { using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_; }; +struct _pi_physical_mem : ur_physical_mem_handle_t_ { + using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; +}; + #endif // PI_HIP_HPP diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index f88e8c1ed3cd3..61ddafc09ccdc 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1424,6 +1424,154 @@ piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { return pi2ur::piextCommandBufferReleaseCommand(Command); } +/// API for getting information about the minimum and recommended granularity +/// of physical and virtual memory. +/// +/// \param Context is the context to get the granularity from. +/// \param Device is the device to get the granularity from. +/// \param MemSize is the potentially unadjusted size to get granularity for. +/// \param ParamName is the type of query to perform. +/// \param ParamValueSize is the size of the result in bytes. +/// \param ParamValue is the result. +/// \param ParamValueSizeRet is how many bytes were written. +pi_result +piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +/// API for creating a physical memory handle that virtual memory can be mapped +/// to. +/// +/// \param Context is the context within which the physical memory is allocated. +/// \param Device is the device the physical memory is on. +/// \param MemSize is the size of physical memory to allocate. This must be a +/// multiple of the minimum virtual memory granularity. +/// \param RetPhysicalMem is the handle for the resulting physical memory. +pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, + size_t MemSize, + pi_physical_mem *RetPhysicalMem) { + return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, + RetPhysicalMem); +} + +/// API for retaining a physical memory handle. +/// +/// \param PhysicalMem is the handle for the physical memory to retain. +pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRetain(PhysicalMem); +} + +/// API for releasing a physical memory handle. +/// +/// \param PhysicalMem is the handle for the physical memory to free. +pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRelease(PhysicalMem); +} + +/// API for reserving a virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// reserved. +/// \param Start is a pointer to the start of the region to reserve. If nullptr +/// the implementation selects a start address. +/// \param RangeSize is the size of the virtual address range to reserve in +/// bytes. +/// \param RetPtr is the pointer to the start of the resulting virtual memory +/// range. +pi_result piextVirtualMemReserve(pi_context Context, const void *Start, + size_t RangeSize, void **RetPtr) { + return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); +} + +/// API for freeing a virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// reserved. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range. +pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); +} + +/// API for mapping a virtual memory range to a a physical memory allocation at +/// a given offset. +/// +/// \param Context is the context within which both the virtual memory range is +/// reserved and the physical memory is allocated. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range. +/// \param PhysicalMem is the handle for the physical memory to map Ptr to. +/// \param Offset is the offset into PhysicalMem in bytes to map Ptr to. +/// \param Flags is the access flags to set for the mapping. +pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, pi_physical_mem PhysicalMem, + size_t Offset, pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, + Flags); +} + +/// API for unmapping a virtual memory range previously mapped in a context. +/// After a call to this function, the virtual memory range is left in a state +/// ready to be remapped. +/// +/// \param Context is the context within which the virtual memory range is +/// currently mapped. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range in bytes. +pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); +} + +/// API for setting the access mode of a mapped virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// currently mapped. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range in bytes. +/// \param Flags is the access flags to set for the mapped virtual access range. +pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); +} + +/// API for getting info about a mapped virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// currently mapped. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range in bytes. +/// \param ParamName is the type of query to perform. +/// \param ParamValueSize is the size of the result in bytes. +/// \param ParamValue is the result. +/// \param ParamValueSizeRet is how many bytes were written. +pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + const char SupportedVersion[] = _PI_LEVEL_ZERO_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { // missing diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index d867caea5e23d..e1f40fcbcd16b 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -1321,6 +1321,79 @@ pi_result piextKernelSuggestMaxCooperativeGroupCount( return PI_ERROR_UNSUPPORTED_FEATURE; } +pi_result +piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result piextPhysicalMemCreate(pi_context context, pi_device device, + size_t mem_size, + pi_physical_mem *ret_physical_mem) { + return pi2ur::piextPhysicalMemCreate(context, device, mem_size, + ret_physical_mem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRetain(physical_mem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRelease(physical_mem); +} + +pi_result piextVirtualMemReserve(pi_context context, const void *start, + size_t range_size, void **ret_ptr) { + return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); +} + +pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemFree(context, ptr, range_size); +} + +pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, pi_physical_mem physical_mem, + size_t offset, pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, + offset, flags); +} + +pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); +} + +pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); +} + +pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + // Initialize function table with stubs. #define _PI_API(api) \ (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); diff --git a/sycl/plugins/native_cpu/pi_native_cpu.hpp b/sycl/plugins/native_cpu/pi_native_cpu.hpp index 1d92580997b76..287b3c03115b6 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.hpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -43,3 +44,7 @@ struct _pi_program : ur_program_handle_t_ { struct _pi_queue : ur_queue_handle_t_ { using ur_queue_handle_t_::ur_queue_handle_t_; }; + +struct _pi_physical_mem : ur_physical_mem_handle_t_ { + using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; +}; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 1fef329d179af..c569adb4a8839 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1228,6 +1228,79 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ParamValueSizeRet); } +pi_result +piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, + size_t MemSize, + pi_physical_mem *RetPhysicalMem) { + return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, + RetPhysicalMem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRetain(PhysicalMem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRelease(PhysicalMem); +} + +pi_result piextVirtualMemReserve(pi_context Context, const void *Start, + size_t RangeSize, void **RetPtr) { + return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); +} + +pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); +} + +pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, pi_physical_mem PhysicalMem, + size_t Offset, pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, + Flags); +} + +pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); +} + +pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); +} + +pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + pi_result piTearDown(void *PluginParameter) { return pi2ur::piTearDown(PluginParameter); } diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index bb36a41b78ed6..252deba45219a 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -100,13 +100,12 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 396fb20498c315a526c961d7cb645b42795acd2c - # Merge: 719bb9cd e2ffea69 - # Author: Kenneth Benzie (Benie) - # Date: Thu May 23 10:53:03 2024 +0100 - # Merge pull request #1501 from RossBrunton/ross/kerneltests - # [Testing] Spec clarifications and testing updates for kernel - set(UNIFIED_RUNTIME_TAG 764b75c9087930799963a30be726ac76fcf1ac11) + # commit aaf08109f2a05adb61f50478824ae2739526daee + # Author: Ben Tracy + # Date: Mon Jun 17 14:49:53 2024 +0100 + # [CMDBUF] Fix coverity issue in command buffers + # - Fix incorrect conditions for copy engine usage that were reported on coverity. + set(UNIFIED_RUNTIME_TAG aaf08109f2a05adb61f50478824ae2739526daee) fetch_adapter_source(level_zero ${UNIFIED_RUNTIME_REPO} diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 3ee63a025593b..3690c78d61b47 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -678,6 +678,31 @@ inline pi_result ur2piSamplerInfoValue(ur_sampler_info_t ParamName, } } +inline pi_result ur2piVirtualMemInfoValue(ur_virtual_mem_info_t ParamName, + size_t ParamValueSizePI, + size_t *ParamValueSizeUR, + void *ParamValue) { + + ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); + switch (ParamName) { + case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { + auto ConvertFunc = [](ur_virtual_mem_access_flags_t UrValue) { + pi_virtual_access_flags PiValue = 0; + if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) + PiValue |= PI_VIRTUAL_ACCESS_FLAG_RW; + if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) + PiValue |= PI_VIRTUAL_ACCESS_FLAG_READ_ONLY; + return PiValue; + }; + return Value + .convert( + ConvertFunc); + } + default: + return PI_SUCCESS; + } +} + // Translate UR device info values to PI info values inline pi_result ur2piUSMAllocInfoValue(ur_usm_alloc_info_t ParamName, size_t ParamValueSizePI, @@ -1300,6 +1325,9 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT, UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO( + PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT, + UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP) PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_INTEL_DEVICE_INFO_ESIMD_SUPPORT, UR_DEVICE_INFO_ESIMD_SUPPORT) PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_COMPONENT_DEVICES, @@ -1311,6 +1339,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM, + UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT) #undef PI_TO_UR_MAP_DEVICE_INFO default: return PI_ERROR_UNKNOWN; @@ -5665,4 +5695,219 @@ inline pi_result piextSignalExternalSemaphore( // Bindless Images Extension /////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// Virtual Memory + +inline pi_result +piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_device_handle_t UrDevice = reinterpret_cast(Device); + + ur_virtual_mem_granularity_info_t InfoType{}; + switch (ParamName) { + case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: + InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; + break; + case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: + InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; + break; + default: + return PI_ERROR_UNKNOWN; + } + + HANDLE_ERRORS(urVirtualMemGranularityGetInfo(UrContext, UrDevice, InfoType, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + return PI_SUCCESS; +} + +inline pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, + size_t MemSize, + pi_physical_mem *RetPhyscialMem) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_device_handle_t UrDevice = reinterpret_cast(Device); + + ur_physical_mem_handle_t *UrPhysicalMem = + reinterpret_cast(RetPhyscialMem); + + HANDLE_ERRORS(urPhysicalMemCreate(UrContext, UrDevice, MemSize, nullptr, + UrPhysicalMem)); + + return PI_SUCCESS; +} + +inline pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE); + + ur_physical_mem_handle_t UrPhysicalMem = + reinterpret_cast(PhysicalMem); + + HANDLE_ERRORS(urPhysicalMemRetain(UrPhysicalMem)); + + return PI_SUCCESS; +} + +inline pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + + ur_physical_mem_handle_t UrPhysicalMem = + reinterpret_cast(PhysicalMem); + + HANDLE_ERRORS(urPhysicalMemRelease(UrPhysicalMem)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemReserve(pi_context Context, const void *Start, + size_t RangeSize, void **RetPtr) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(RetPtr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + HANDLE_ERRORS(urVirtualMemReserve(UrContext, Start, RangeSize, RetPtr)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + HANDLE_ERRORS(urVirtualMemFree(UrContext, Ptr, RangeSize)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_access_flags Flags) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_virtual_mem_access_flags_t UrFlags = 0; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; + + HANDLE_ERRORS(urVirtualMemSetAccess(UrContext, Ptr, RangeSize, UrFlags)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_physical_mem PhysicalMem, size_t Offset, + pi_virtual_access_flags Flags) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_physical_mem_handle_t UrPhysicalMem = + reinterpret_cast(PhysicalMem); + + ur_virtual_mem_access_flags_t UrFlags = 0; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; + + HANDLE_ERRORS(urVirtualMemMap(UrContext, Ptr, RangeSize, UrPhysicalMem, + Offset, UrFlags)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, + size_t RangeSize) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + HANDLE_ERRORS(urVirtualMemUnmap(UrContext, Ptr, RangeSize)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_virtual_mem_info_t InfoType{}; + switch (ParamName) { + case PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE: + InfoType = UR_VIRTUAL_MEM_INFO_ACCESS_MODE; + break; + default: + return PI_ERROR_UNKNOWN; + } + + HANDLE_ERRORS(urVirtualMemGetInfo(UrContext, Ptr, RangeSize, InfoType, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + ur2piVirtualMemInfoValue(InfoType, ParamValueSize, &ParamValueSize, + ParamValue); + + return PI_SUCCESS; +} + +// Virtual Memory +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Enqueue Native Command Extension +inline pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *MemList, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); + + auto UrQueue = reinterpret_cast(Queue); + auto UrFn = reinterpret_cast(Fn); + const ur_mem_handle_t *UrMemList = + reinterpret_cast(MemList); + const ur_event_handle_t *UrEventWaitList = + reinterpret_cast(EventWaitList); + ur_event_handle_t *UREvent = reinterpret_cast(Event); + + HANDLE_ERRORS(urEnqueueNativeCommandExp( + UrQueue, UrFn, Data, NumMems, UrMemList, nullptr /*pProperties*/, + NumEventsInWaitList, UrEventWaitList, UREvent)); + + return PI_SUCCESS; +} +// Enqueue Native Command Extension +/////////////////////////////////////////////////////////////////////////////// + } // namespace pi2ur diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 30ba9a7afc8b1..59d6b27017eae 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -1189,6 +1189,72 @@ piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { return pi2ur::piextCommandBufferReleaseCommand(Command); } +__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo( + pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +__SYCL_EXPORT pi_result +piextPhysicalMemCreate(pi_context Context, pi_device Device, size_t MemSize, + pi_physical_mem *RetPhsycialMem) { + return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, + RetPhsycialMem); +} + +__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRetain(PhysicalMem); +} + +__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRelease(PhysicalMem); +} + +__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context Context, + const void *Start, + size_t RangeSize, + void **RetPtr) { + return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); +} + +__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); +} + +__SYCL_EXPORT pi_result +piextVirtualMemSetAccess(pi_context Context, const void *Ptr, size_t RangeSize, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); +} + +__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_physical_mem PhysicalMem, + size_t Offset, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, + Flags); +} + +__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context Context, + const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); +} + +__SYCL_EXPORT pi_result +piextVirtualMemGetInfo(pi_context Context, const void *Ptr, size_t RangeSize, + pi_virtual_mem_info ParamName, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { @@ -1381,6 +1447,16 @@ __SYCL_EXPORT pi_result piextSignalExternalSemaphore( EventWaitList, Event); } +pi_result +piextEnqueueNativeCommand(pi_queue Queue, pi_enqueue_native_command_function Fn, + void *Data, pi_uint32 NumMems, const pi_mem *Mems, + pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { + return pi2ur::piextEnqueueNativeCommand(Queue, Fn, Data, NumMems, Mems, + NumEventsInWaitList, EventWaitList, + Event); +} + // This interface is not in Unified Runtime currently __SYCL_EXPORT pi_result piPluginInit(pi_plugin *PluginInit) { PI_ASSERT(PluginInit, PI_ERROR_INVALID_VALUE); diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index d683f32d16892..f915ef4e2cb8e 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -248,6 +248,7 @@ set(SYCL_COMMON_SOURCES "context.cpp" "device.cpp" "device_selector.cpp" + "enqueue_functions.cpp" "event.cpp" "exception.cpp" "exception_list.cpp" @@ -256,12 +257,13 @@ set(SYCL_COMMON_SOURCES "interop_handle.cpp" "kernel.cpp" "kernel_bundle.cpp" + "physical_mem.cpp" "platform.cpp" "queue.cpp" "sampler.cpp" "stream.cpp" "spirv_ops.cpp" - "esimd_emulator_device_interface.cpp" + "virtual_mem.cpp" "$<$:detail/windows_pi.cpp>" "$<$,$>:detail/posix_pi.cpp>" ) diff --git a/sycl/source/accessor.cpp b/sycl/source/accessor.cpp index ebd8e83b3c665..d3f1557871d95 100644 --- a/sycl/source/accessor.cpp +++ b/sycl/source/accessor.cpp @@ -23,32 +23,6 @@ device getDeviceFromHandler(handler &cgh) { return cgh.MGraph->getDevice(); } -// TODO: the following function to be removed during next ABI break window -AccessorBaseHost::AccessorBaseHost(id<3> Offset, range<3> AccessRange, - range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, - int Dims, int ElemSize, int OffsetInBytes, - bool IsSubBuffer, - const property_list &PropertyList) { - impl = std::shared_ptr( - new AccessorImplHost(Offset, AccessRange, MemoryRange, AccessMode, - (detail::SYCLMemObjI *)SYCLMemObject, Dims, ElemSize, - false, OffsetInBytes, IsSubBuffer, PropertyList)); -} - -// TODO: the following function to be removed during next ABI break window -AccessorBaseHost::AccessorBaseHost(id<3> Offset, range<3> AccessRange, - range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, - int Dims, int ElemSize, bool IsPlaceH, - int OffsetInBytes, bool IsSubBuffer, - const property_list &PropertyList) { - impl = std::shared_ptr( - new AccessorImplHost(Offset, AccessRange, MemoryRange, AccessMode, - (detail::SYCLMemObjI *)SYCLMemObject, Dims, ElemSize, - IsPlaceH, OffsetInBytes, IsSubBuffer, PropertyList)); -} - AccessorBaseHost::AccessorBaseHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, void *SYCLMemObject, diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index ed0539f266ee2..cb142906a5970 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -58,8 +58,6 @@ backend convertBackend(pi_platform_backend PiBackend) { return backend::ext_oneapi_cuda; case PI_EXT_PLATFORM_BACKEND_HIP: return backend::ext_oneapi_hip; - case PI_EXT_PLATFORM_BACKEND_ESIMD: - return backend::ext_intel_esimd_emulator; case PI_EXT_PLATFORM_BACKEND_NATIVE_CPU: return backend::ext_oneapi_native_cpu; } @@ -93,15 +91,21 @@ __SYCL_EXPORT device make_device(pi_native_handle NativeHandle, __SYCL_EXPORT context make_context(pi_native_handle NativeHandle, const async_handler &Handler, - backend Backend) { + backend Backend, bool KeepOwnership, + const std::vector &DeviceList) { const auto &Plugin = getPlugin(Backend); pi::PiContext PiContext = nullptr; + std::vector DeviceHandles; + for (auto Dev : DeviceList) { + DeviceHandles.push_back(detail::getSyclObjImpl(Dev)->getHandleRef()); + } Plugin->call( - NativeHandle, 0, nullptr, false, &PiContext); + NativeHandle, DeviceHandles.size(), DeviceHandles.data(), false, + &PiContext); // Construct the SYCL context from PI context. - return detail::createSyclObjFromImpl( - std::make_shared(PiContext, Handler, Plugin)); + return detail::createSyclObjFromImpl(std::make_shared( + PiContext, Handler, Plugin, DeviceList, !KeepOwnership)); } __SYCL_EXPORT queue make_queue(pi_native_handle NativeHandle, diff --git a/sycl/source/backend/level_zero.cpp b/sycl/source/backend/level_zero.cpp index 7f43f12c0cc4e..ade630ba426c2 100644 --- a/sycl/source/backend/level_zero.cpp +++ b/sycl/source/backend/level_zero.cpp @@ -15,17 +15,9 @@ namespace sycl { inline namespace _V1 { -namespace ext::oneapi::level_zero { -using namespace detail; +namespace ext::oneapi::level_zero::detail { +using namespace sycl::detail; -//---------------------------------------------------------------------------- -// Implementation of level_zero::make -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { - return detail::make_platform(NativeHandle, backend::ext_oneapi_level_zero); -} - -//---------------------------------------------------------------------------- -// Implementation of level_zero::make __SYCL_EXPORT device make_device(const platform &Platform, pi_native_handle NativeHandle) { const auto &Plugin = pi::getPlugin(); @@ -39,48 +31,6 @@ __SYCL_EXPORT device make_device(const platform &Platform, PlatformImpl->getOrMakeDeviceImpl(PiDevice, PlatformImpl)); } -//---------------------------------------------------------------------------- -// Implementation of level_zero::make -__SYCL_EXPORT context make_context(const std::vector &DeviceList, - pi_native_handle NativeHandle, - bool KeepOwnership) { - const auto &Plugin = pi::getPlugin(); - // Create PI context first. - pi_context PiContext; - std::vector DeviceHandles; - for (auto Dev : DeviceList) { - DeviceHandles.push_back(detail::getSyclObjImpl(Dev)->getHandleRef()); - } - Plugin->call( - NativeHandle, DeviceHandles.size(), DeviceHandles.data(), !KeepOwnership, - &PiContext); - // Construct the SYCL context from PI context. - return detail::createSyclObjFromImpl( - std::make_shared(PiContext, detail::defaultAsyncHandler, - Plugin, DeviceList, !KeepOwnership)); -} - -//---------------------------------------------------------------------------- -// Implementation of level_zero::make -__SYCL_EXPORT queue make_queue(const context &Context, const device &Device, - pi_native_handle NativeHandle, bool IsImmCmdList, - bool KeepOwnership, - const property_list &Properties) { - const auto &ContextImpl = getSyclObjImpl(Context); - return detail::make_queue( - NativeHandle, IsImmCmdList, Context, &Device, KeepOwnership, Properties, - ContextImpl->get_async_handler(), backend::ext_oneapi_level_zero); -} - -//---------------------------------------------------------------------------- -// Implementation of level_zero::make -__SYCL_EXPORT event make_event(const context &Context, - pi_native_handle NativeHandle, - bool KeepOwnership) { - return detail::make_event(NativeHandle, Context, KeepOwnership, - backend::ext_oneapi_level_zero); -} - -} // namespace ext::oneapi::level_zero +} // namespace ext::oneapi::level_zero::detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/backend/opencl.cpp b/sycl/source/backend/opencl.cpp index 5a282542c5b24..9bdd8bfaeb317 100644 --- a/sycl/source/backend/opencl.cpp +++ b/sycl/source/backend/opencl.cpp @@ -21,34 +21,6 @@ inline namespace _V1 { namespace opencl { using namespace detail; -//---------------------------------------------------------------------------- -// Implementation of opencl::make -__SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { - return detail::make_platform(NativeHandle, backend::opencl); -} - -//---------------------------------------------------------------------------- -// Implementation of opencl::make -__SYCL_EXPORT device make_device(pi_native_handle NativeHandle) { - return detail::make_device(NativeHandle, backend::opencl); -} - -//---------------------------------------------------------------------------- -// Implementation of opencl::make -__SYCL_EXPORT context make_context(pi_native_handle NativeHandle) { - return detail::make_context(NativeHandle, detail::defaultAsyncHandler, - backend::opencl); -} - -//---------------------------------------------------------------------------- -// Implementation of opencl::make -__SYCL_EXPORT queue make_queue(const context &Context, - pi_native_handle NativeHandle) { - const auto &ContextImpl = getSyclObjImpl(Context); - return detail::make_queue(NativeHandle, 0, Context, nullptr, false, {}, - ContextImpl->get_async_handler(), backend::opencl); -} - //---------------------------------------------------------------------------- // Free functions to query OpenCL backend extensions __SYCL_EXPORT bool has_extension(const sycl::platform &SyclPlatform, diff --git a/sycl/source/detail/accessor_impl.hpp b/sycl/source/detail/accessor_impl.hpp index 7011e6902f0bf..377dca7cc90ee 100644 --- a/sycl/source/detail/accessor_impl.hpp +++ b/sycl/source/detail/accessor_impl.hpp @@ -39,32 +39,8 @@ class SYCLMemObjI; class Command; -class __SYCL_EXPORT AccessorImplHost { +class AccessorImplHost { public: - // TODO: Remove when ABI break is allowed. - AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, int Dims, - int ElemSize, int OffsetInBytes = 0, - bool IsSubBuffer = false, - const property_list &PropertyList = {}) - : MAccData(Offset, AccessRange, MemoryRange), MAccessMode(AccessMode), - MSYCLMemObj((detail::SYCLMemObjI *)SYCLMemObject), MDims(Dims), - MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes), - MIsSubBuffer(IsSubBuffer), MPropertyList(PropertyList), - MIsPlaceH(false) {} - - // TODO: Remove when ABI break is allowed. - AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, - access::mode AccessMode, void *SYCLMemObject, int Dims, - int ElemSize, bool IsPlaceH, int OffsetInBytes = 0, - bool IsSubBuffer = false, - const property_list &PropertyList = {}) - : MAccData(Offset, AccessRange, MemoryRange), MAccessMode(AccessMode), - MSYCLMemObj((detail::SYCLMemObjI *)SYCLMemObject), MDims(Dims), - MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes), - MIsSubBuffer(IsSubBuffer), MPropertyList(PropertyList), - MIsPlaceH(IsPlaceH) {} - AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, void *SYCLMemObject, int Dims, int ElemSize, size_t OffsetInBytes = 0, @@ -151,7 +127,7 @@ class __SYCL_EXPORT AccessorImplHost { using AccessorImplPtr = std::shared_ptr; -class __SYCL_EXPORT LocalAccessorImplHost { +class LocalAccessorImplHost { public: // Allocate ElemSize more data to have sufficient padding to enforce // alignment. diff --git a/sycl/source/detail/buffer_impl.hpp b/sycl/source/detail/buffer_impl.hpp index af4da07ffd198..8408cb0da38aa 100644 --- a/sycl/source/detail/buffer_impl.hpp +++ b/sycl/source/detail/buffer_impl.hpp @@ -38,7 +38,7 @@ class host_accessor; namespace detail { -class __SYCL_EXPORT buffer_impl final : public SYCLMemObjT { +class buffer_impl final : public SYCLMemObjT { using BaseT = SYCLMemObjT; using typename BaseT::MemObjType; @@ -129,22 +129,6 @@ class __SYCL_EXPORT buffer_impl final : public SYCLMemObjT { : BaseT(MemObject, SyclContext, OwnNativeHandle, std::move(AvailableEvent), std::move(Allocator)) {} - // TODO: remove the following 2 constructors when it is allowed to break ABI. - buffer_impl(cl_mem MemObject, const context &SyclContext, - const size_t SizeInBytes, - std::unique_ptr Allocator, - event AvailableEvent) - : buffer_impl(pi::cast(MemObject), SyclContext, - SizeInBytes, std::move(Allocator), - std::move(AvailableEvent)) {} - - buffer_impl(pi_native_handle MemObject, const context &SyclContext, - const size_t SizeInBytes, - std::unique_ptr Allocator, - event AvailableEvent) - : BaseT(MemObject, SyclContext, SizeInBytes, std::move(AvailableEvent), - std::move(Allocator)) {} - void *allocateMem(ContextImplPtr Context, bool InitFromUserData, void *HostPtr, sycl::detail::pi::PiEvent &OutEventToWait) override; diff --git a/sycl/source/detail/config.cpp b/sycl/source/detail/config.cpp index f632e3c94c234..21ce89458835f 100644 --- a/sycl/source/detail/config.cpp +++ b/sycl/source/detail/config.cpp @@ -163,17 +163,15 @@ void dumpConfig() { // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST and // ONEAPI_DEVICE_SELECTOR -// TODO: Remove esimd_emulator in the next ABI breaking window. // TODO: host device type will be removed once sycl_ext_oneapi_filter_selector // is removed. -const std::array, 8> &getSyclBeMap() { - static const std::array, 8> SyclBeMap = { +const std::array, 7> &getSyclBeMap() { + static const std::array, 7> SyclBeMap = { {{"host", backend::host}, {"opencl", backend::opencl}, {"level_zero", backend::ext_oneapi_level_zero}, {"cuda", backend::ext_oneapi_cuda}, {"hip", backend::ext_oneapi_hip}, - {"esimd_emulator", backend::ext_intel_esimd_emulator}, {"native_cpu", backend::ext_oneapi_native_cpu}, {"*", backend::all}}}; return SyclBeMap; diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index efbdc81fb34fb..71ba0310a24e8 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -251,7 +251,7 @@ getSyclDeviceTypeMap() { // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST and // ONEAPI_DEVICE_SELECTOR -const std::array, 8> &getSyclBeMap(); +const std::array, 7> &getSyclBeMap(); // --------------------------------------- // ONEAPI_DEVICE_SELECTOR support diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 388c312305d4a..add8bfa679b1a 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -145,23 +145,27 @@ cl_context context_impl::get() const { bool context_impl::is_host() const { return MHostContext; } context_impl::~context_impl() { - // Free all events associated with the initialization of device globals. - for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers) - DeviceGlobalInitializer.second.ClearEvents(getPlugin()); - // Free all device_global USM allocations associated with this context. - for (const void *DeviceGlobal : MAssociatedDeviceGlobals) { - DeviceGlobalMapEntry *DGEntry = - detail::ProgramManager::getInstance().getDeviceGlobalEntry( - DeviceGlobal); - DGEntry->removeAssociatedResources(this); - } - for (auto LibProg : MCachedLibPrograms) { - assert(LibProg.second && "Null program must not be kept in the cache"); - getPlugin()->call(LibProg.second); - } - if (!MHostContext) { - // TODO catch an exception and put it to list of asynchronous exceptions - getPlugin()->call_nocheck(MContext); + try { + // Free all events associated with the initialization of device globals. + for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers) + DeviceGlobalInitializer.second.ClearEvents(getPlugin()); + // Free all device_global USM allocations associated with this context. + for (const void *DeviceGlobal : MAssociatedDeviceGlobals) { + DeviceGlobalMapEntry *DGEntry = + detail::ProgramManager::getInstance().getDeviceGlobalEntry( + DeviceGlobal); + DGEntry->removeAssociatedResources(this); + } + for (auto LibProg : MCachedLibPrograms) { + assert(LibProg.second && "Null program must not be kept in the cache"); + getPlugin()->call(LibProg.second); + } + if (!MHostContext) { + // TODO catch an exception and put it to list of asynchronous exceptions + getPlugin()->call(MContext); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~context_impl", e); } } diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index d55bf31ff2e3a..9400037a86adf 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -81,7 +81,7 @@ DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM( sizeof(MDeviceGlobalPtr)), QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr, std::vector{}, - &InitEvent); + &InitEvent, nullptr); NewAlloc.MInitEvent = InitEvent; } diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index f21bf3ccd0185..eda511e340d10 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -300,15 +300,18 @@ class device_image_impl { } ~device_image_impl() { - - if (MProgram) { - const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); - Plugin->call(MProgram); - } - if (MSpecConstsBuffer) { - std::lock_guard Lock{MSpecConstAccessMtx}; - const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); - memReleaseHelper(Plugin, MSpecConstsBuffer); + try { + if (MProgram) { + const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); + Plugin->call(MProgram); + } + if (MSpecConstsBuffer) { + std::lock_guard Lock{MSpecConstAccessMtx}; + const PluginPtr &Plugin = getSyclObjImpl(MContext)->getPlugin(); + memReleaseHelper(Plugin, MSpecConstsBuffer); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~device_image_impl", e); } } diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 751acd385a77d..8547a40d4b999 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -363,7 +363,7 @@ bool device_impl::has(aspect Aspect) const { return is_accelerator(); case aspect::custom: return false; - // TODO: Implement this for FPGA and ESIMD emulators. + // TODO: Implement this for FPGA emulator. case aspect::emulated: return false; case aspect::host_debuggable: @@ -746,9 +746,17 @@ bool device_impl::has(aspect Aspect) const { sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; return call_successful && support; } + case aspect::ext_oneapi_virtual_mem: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } } - throw runtime_error("This device aspect has not been implemented yet.", - PI_ERROR_INVALID_DEVICE); + + return false; // This device aspect has not been implemented yet. } std::shared_ptr device_impl::getHostDeviceImpl() { diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index c7d245e5e91c0..63d60e41ac7e8 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -54,8 +54,12 @@ bool event_impl::is_host() { } event_impl::~event_impl() { - if (MEvent) - getPlugin()->call(MEvent); + try { + if (MEvent) + getPlugin()->call(MEvent); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~event_impl", e); + } } void event_impl::waitInternal(bool *Success) { diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 072a9628d6a6b..301125d9b5c93 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -54,14 +54,18 @@ class ObjectUsageCounter { MCounter++; } ~ObjectUsageCounter() { - if (!MModifyCounter) - return; - - LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector); - MCounter--; - GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr(); - if (RTGlobalObjHandler) { - RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter); + try { + if (!MModifyCounter) + return; + + LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector); + MCounter--; + GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr(); + if (RTGlobalObjHandler) { + RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ObjectUsageCounter", e); } } @@ -234,12 +238,17 @@ void GlobalHandler::releaseDefaultContexts() { struct EarlyShutdownHandler { ~EarlyShutdownHandler() { + try { #ifdef _WIN32 - // on Windows we keep to the existing shutdown procedure - GlobalHandler::instance().releaseDefaultContexts(); + // on Windows we keep to the existing shutdown procedure + GlobalHandler::instance().releaseDefaultContexts(); #else - shutdown_early(); + shutdown_early(); #endif + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~EarlyShutdownHandler", + e); + } } }; diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 3268a27fbb827..9ef8ce262932f 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -297,9 +297,13 @@ void exec_graph_impl::makePartitions() { } graph_impl::~graph_impl() { - clearQueues(); - for (auto &MemObj : MMemObjs) { - MemObj->markNoLongerBeingUsedInGraph(); + try { + clearQueues(); + for (auto &MemObj : MMemObjs) { + MemObj->markNoLongerBeingUsedInGraph(); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~graph_impl", e); } } @@ -690,7 +694,8 @@ sycl::detail::pi::PiExtSyncPoint exec_graph_impl::enqueueNode( sycl::detail::EventImplPtr Event = sycl::detail::Scheduler::getInstance().addCG( - Node->getCGCopy(), AllocaQueue, CommandBuffer, Deps); + Node->getCGCopy(), AllocaQueue, /*EventNeeded=*/true, CommandBuffer, + Deps); MCommandMap[Node] = Event->getCommandBufferCommand(); return Event->getSyncPoint(); @@ -783,34 +788,38 @@ exec_graph_impl::exec_graph_impl(sycl::context Context, } exec_graph_impl::~exec_graph_impl() { - const sycl::detail::PluginPtr &Plugin = - sycl::detail::getSyclObjImpl(MContext)->getPlugin(); - MSchedule.clear(); - // We need to wait on all command buffer executions before we can release - // them. - for (auto &Event : MExecutionEvents) { - Event->wait(Event); - } + try { + const sycl::detail::PluginPtr &Plugin = + sycl::detail::getSyclObjImpl(MContext)->getPlugin(); + MSchedule.clear(); + // We need to wait on all command buffer executions before we can release + // them. + for (auto &Event : MExecutionEvents) { + Event->wait(Event); + } - for (const auto &Partition : MPartitions) { - Partition->MSchedule.clear(); - for (const auto &Iter : Partition->MPiCommandBuffers) { - if (auto CmdBuf = Iter.second; CmdBuf) { + for (const auto &Partition : MPartitions) { + Partition->MSchedule.clear(); + for (const auto &Iter : Partition->MPiCommandBuffers) { + if (auto CmdBuf = Iter.second; CmdBuf) { + pi_result Res = Plugin->call_nocheck< + sycl::detail::PiApiKind::piextCommandBufferRelease>(CmdBuf); + (void)Res; + assert(Res == pi_result::PI_SUCCESS); + } + } + } + + for (auto &Iter : MCommandMap) { + if (auto Command = Iter.second; Command) { pi_result Res = Plugin->call_nocheck< - sycl::detail::PiApiKind::piextCommandBufferRelease>(CmdBuf); + sycl::detail::PiApiKind::piextCommandBufferReleaseCommand>(Command); (void)Res; assert(Res == pi_result::PI_SUCCESS); } } - } - - for (auto &Iter : MCommandMap) { - if (auto Command = Iter.second; Command) { - pi_result Res = Plugin->call_nocheck< - sycl::detail::PiApiKind::piextCommandBufferReleaseCommand>(Command); - (void)Res; - assert(Res == pi_result::PI_SUCCESS); - } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~exec_graph_impl", e); } } @@ -928,7 +937,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, CommandBuffer, nullptr, std::move(CGData)); NewEvent = sycl::detail::Scheduler::getInstance().addCG( - std::move(CommandGroup), Queue); + std::move(CommandGroup), Queue, /*EventNeeded=*/true); } NewEvent->setEventFromSubmittedExecCommandBuffer(true); } else if ((CurrentPartition->MSchedule.size() > 0) && @@ -946,7 +955,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, .MQueue = Queue; NewEvent = sycl::detail::Scheduler::getInstance().addCG( - NodeImpl->getCGCopy(), Queue); + NodeImpl->getCGCopy(), Queue, /*EventNeeded=*/true); } else { std::vector> ScheduledEvents; for (auto &NodeImpl : CurrentPartition->MSchedule) { @@ -982,7 +991,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, // dependencies are propagated in findRealDeps sycl::detail::EventImplPtr EventImpl = sycl::detail::Scheduler::getInstance().addCG( - NodeImpl->getCGCopy(), Queue); + NodeImpl->getCGCopy(), Queue, /*EventNeeded=*/true); ScheduledEvents.push_back(EventImpl); } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index f50c5c94b78d4..f0df55d5e069b 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -31,9 +31,11 @@ enum class HandlerSubmissionState : std::uint8_t { class handler_impl { public: handler_impl(std::shared_ptr SubmissionPrimaryQueue, - std::shared_ptr SubmissionSecondaryQueue) + std::shared_ptr SubmissionSecondaryQueue, + bool EventNeeded) : MSubmissionPrimaryQueue(std::move(SubmissionPrimaryQueue)), - MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)){}; + MSubmissionSecondaryQueue(std::move(SubmissionSecondaryQueue)), + MEventNeeded(EventNeeded) {}; handler_impl() = default; @@ -74,6 +76,10 @@ class handler_impl { /// submission is a fallback from a previous submission. std::shared_ptr MSubmissionSecondaryQueue; + /// Bool stores information about whether the event resulting from the + /// corresponding work is required. + bool MEventNeeded = true; + // Stores auxiliary resources used by internal operations. std::vector> MAuxiliaryResources; diff --git a/sycl/source/detail/image_impl.hpp b/sycl/source/detail/image_impl.hpp index 386a4636b5fe5..d526ce19281a4 100644 --- a/sycl/source/detail/image_impl.hpp +++ b/sycl/source/detail/image_impl.hpp @@ -40,25 +40,24 @@ namespace detail { using image_allocator = aligned_allocator; // utility function: Returns the Number of Channels for a given Order. -__SYCL_EXPORT uint8_t getImageNumberChannels(image_channel_order Order); +uint8_t getImageNumberChannels(image_channel_order Order); // utility function: Returns the number of bytes per image element -__SYCL_EXPORT uint8_t getImageElementSize(uint8_t NumChannels, - image_channel_type Type); +uint8_t getImageElementSize(uint8_t NumChannels, image_channel_type Type); -__SYCL_EXPORT sycl::detail::pi::PiMemImageChannelOrder +sycl::detail::pi::PiMemImageChannelOrder convertChannelOrder(image_channel_order Order); -__SYCL_EXPORT image_channel_order +image_channel_order convertChannelOrder(sycl::detail::pi::PiMemImageChannelOrder Order); -__SYCL_EXPORT sycl::detail::pi::PiMemImageChannelType +sycl::detail::pi::PiMemImageChannelType convertChannelType(image_channel_type Type); -__SYCL_EXPORT image_channel_type +image_channel_type convertChannelType(sycl::detail::pi::PiMemImageChannelType Type); -class __SYCL_EXPORT image_impl final : public SYCLMemObjT { +class image_impl final : public SYCLMemObjT { using BaseT = SYCLMemObjT; using typename BaseT::MemObjType; diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index e849fb3b57ad0..952482e42d79f 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -468,15 +468,6 @@ detectIdenticalParameter(std::vector &Params, ArgDesc Arg) { return Params.end(); } -template >> -F *storePlainArg(std::vector> &ArgStorage, T &&Arg) { - ArgStorage.emplace_back(sizeof(T)); - auto Storage = reinterpret_cast(ArgStorage.back().data()); - *Storage = Arg; - return Storage; -} - void *storePlainArgRaw(std::vector> &ArgStorage, void *ArgPtr, size_t ArgSize) { ArgStorage.emplace_back(ArgSize); @@ -485,6 +476,11 @@ void *storePlainArgRaw(std::vector> &ArgStorage, void *ArgPtr, return Storage; } +template +void *storePlainArg(std::vector> &ArgStorage, T &&Arg) { + return storePlainArgRaw(ArgStorage, &Arg, sizeof(T)); +} + static ParamIterator preProcessArguments( std::vector> &ArgStorage, ParamIterator Arg, PromotionMap &PromotedAccs, @@ -648,10 +644,10 @@ updatePromotedArgs(const ::jit_compiler::SYCLKernelInfo &FusedKernelInfo, Req, Promotion::Local) : 0; range<3> AccessRange{1, 1, LocalSize}; - auto *RangeArg = storePlainArg(FusedArgStorage, AccessRange); + void *RangeArg = storePlainArg(FusedArgStorage, AccessRange); // Use all-zero as the offset id<3> AcessOffset{0, 0, 0}; - auto *OffsetArg = storePlainArg(FusedArgStorage, AcessOffset); + void *OffsetArg = storePlainArg(FusedArgStorage, AcessOffset); // Override the arguments. // 1. Override the pointer with a std-layout argument with 'nullptr' as diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp index f457256884cda..d618c86e07d97 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_opencl.hpp @@ -9,7 +9,6 @@ #pragma once #include -#include // for __SYCL_EXPORT #include #include diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 9c5a1851cd3b1..0696123e94450 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -75,9 +75,13 @@ kernel_impl::kernel_impl(ContextImplPtr Context, ProgramImplPtr ProgramImpl) : MContext(Context), MProgram(ProgramImpl->getHandleRef()) {} kernel_impl::~kernel_impl() { - // TODO catch an exception and put it to list of asynchronous exceptions - if (!is_host()) { - getPlugin()->call(MKernel); + try { + // TODO catch an exception and put it to list of asynchronous exceptions + if (!is_host()) { + getPlugin()->call(MKernel); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~kernel_impl", e); } } diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 840f95ea7a643..37ede74578ed7 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -803,23 +803,6 @@ void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, } } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, - QueueImplPtr SrcQueue, unsigned int DimSrc, - sycl::range<3> SrcSize, sycl::range<3> SrcAccessRange, - sycl::id<3> SrcOffset, unsigned int SrcElemSize, - void *DstMem, QueueImplPtr TgtQueue, - unsigned int DimDst, sycl::range<3> DstSize, - sycl::range<3> DstAccessRange, sycl::id<3> DstOffset, - unsigned int DstElemSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent) { - MemoryManager::copy(SYCLMemObj, SrcMem, SrcQueue, DimSrc, SrcSize, - SrcAccessRange, SrcOffset, SrcElemSize, DstMem, TgtQueue, - DimDst, DstSize, DstAccessRange, DstOffset, DstElemSize, - DepEvents, OutEvent, nullptr); -} - void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, size_t PatternSize, const char *Pattern, unsigned int Dim, sycl::range<3> MemRange, @@ -868,18 +851,6 @@ void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, } } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, - size_t PatternSize, const char *Pattern, - unsigned int Dim, sycl::range<3> Size, - sycl::range<3> Range, sycl::id<3> Offset, - unsigned int ElementSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent) { - MemoryManager::fill(SYCLMemObj, Mem, Queue, PatternSize, Pattern, Dim, Size, - Range, Offset, ElementSize, DepEvents, OutEvent, nullptr); -} - void *MemoryManager::map(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, access::mode AccessMode, unsigned int, sycl::range<3>, sycl::range<3> AccessRange, sycl::id<3> AccessOffset, @@ -973,15 +944,6 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, - size_t Len, void *DstMem, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::copy_usm(SrcMem, SrcQueue, Len, DstMem, DepEvents, OutEvent, - nullptr); -} - void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, int Pattern, std::vector DepEvents, @@ -1011,15 +973,6 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, - int Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::fill_usm(Mem, Queue, Length, Pattern, DepEvents, OutEvent, - nullptr); // OutEventImpl); -} - void MemoryManager::prefetch_usm( void *Mem, QueueImplPtr Queue, size_t Length, std::vector DepEvents, @@ -1036,14 +989,6 @@ void MemoryManager::prefetch_usm( DepEvents.size(), DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::prefetch_usm( - void *Mem, QueueImplPtr Queue, size_t Length, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::prefetch_usm(Mem, Queue, Length, DepEvents, OutEvent, nullptr); -} - void MemoryManager::advise_usm( const void *Mem, QueueImplPtr Queue, size_t Length, pi_mem_advice Advice, std::vector /*DepEvents*/, @@ -1059,15 +1004,6 @@ void MemoryManager::advise_usm( Length, Advice, OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue, - size_t Length, pi_mem_advice Advice, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::advise_usm(Mem, Queue, Length, Advice, DepEvents, OutEvent, - nullptr); -} - void MemoryManager::copy_2d_usm( const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem, size_t DstPitch, size_t Width, size_t Height, @@ -1146,16 +1082,6 @@ void MemoryManager::copy_2d_usm( Queue->getHandleRef(), CopyEvents.size(), CopyEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy_2d_usm( - const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem, - size_t DstPitch, size_t Width, size_t Height, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::copy_2d_usm(SrcMem, SrcPitch, Queue, DstMem, DstPitch, Width, - Height, DepEvents, OutEvent, nullptr); -} - void MemoryManager::fill_2d_usm( void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, const std::vector &Pattern, @@ -1187,16 +1113,6 @@ void MemoryManager::fill_2d_usm( Width, Height, DepEvents.size(), DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::fill_2d_usm( - void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - const std::vector &Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::fill_2d_usm(DstMem, Queue, Pitch, Width, Height, Pattern, - DepEvents, OutEvent, nullptr); -} - void MemoryManager::memset_2d_usm( void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, char Value, std::vector DepEvents, @@ -1228,15 +1144,6 @@ void MemoryManager::memset_2d_usm( Height, DepEvents.size(), DepEvents.data(), OutEvent); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::memset_2d_usm( - void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, - char Value, std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - MemoryManager::memset_2d_usm(DstMem, Queue, Pitch, Width, Height, Value, - DepEvents, OutEvent, nullptr); -} - static void memcpyToDeviceGlobalUSM(QueueImplPtr Queue, DeviceGlobalMapEntry *DeviceGlobalEntry, @@ -1391,16 +1298,6 @@ void MemoryManager::copy_to_device_global( OutEvent, OutEventImpl); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy_to_device_global( - const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, - size_t NumBytes, size_t Offset, const void *SrcMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - copy_to_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes, - Offset, SrcMem, DepEvents, OutEvent, nullptr); -} - void MemoryManager::copy_from_device_global( const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, void *DstMem, @@ -1424,16 +1321,6 @@ void MemoryManager::copy_from_device_global( DepEvents, OutEvent, OutEventImpl); } -// TODO: This function will remain until ABI-breaking change -void MemoryManager::copy_from_device_global( - const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, - size_t NumBytes, size_t Offset, void *DstMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent) { - copy_from_device_global(DeviceGlobalPtr, IsDeviceImageScoped, Queue, NumBytes, - Offset, DstMem, DepEvents, OutEvent, nullptr); -} - // Command buffer methods void MemoryManager::ext_oneapi_copyD2D_cmd_buffer( sycl::detail::ContextImplPtr Context, diff --git a/sycl/source/detail/memory_manager.hpp b/sycl/source/detail/memory_manager.hpp index 1d2800bf9dadc..06ba2e2a25313 100644 --- a/sycl/source/detail/memory_manager.hpp +++ b/sycl/source/detail/memory_manager.hpp @@ -33,7 +33,7 @@ using ContextImplPtr = std::shared_ptr; // The class contains methods that work with memory. All operations with // device memory should go through MemoryManager. -class __SYCL_EXPORT MemoryManager { +class MemoryManager { public: // The following method releases memory allocation of memory object. // Depending on the context it releases memory on host or on device. @@ -120,17 +120,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void copy(SYCLMemObjI *SYCLMemObj, void *SrcMem, QueueImplPtr SrcQueue, - unsigned int DimSrc, sycl::range<3> SrcSize, - sycl::range<3> SrcAccessRange, sycl::id<3> SrcOffset, - unsigned int SrcElemSize, void *DstMem, - QueueImplPtr TgtQueue, unsigned int DimDst, - sycl::range<3> DstSize, sycl::range<3> DstAccessRange, - sycl::id<3> DstOffset, unsigned int DstElemSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent); - static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, size_t PatternSize, const char *Pattern, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, @@ -139,14 +128,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent &OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void fill(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, - size_t PatternSize, const char *Pattern, unsigned int Dim, - sycl::range<3> Size, sycl::range<3> AccessRange, - sycl::id<3> AccessOffset, unsigned int ElementSize, - std::vector DepEvents, - sycl::detail::pi::PiEvent &OutEvent); - static void *map(SYCLMemObjI *SYCLMemObj, void *Mem, QueueImplPtr Queue, access::mode AccessMode, unsigned int Dim, sycl::range<3> Size, sycl::range<3> AccessRange, @@ -165,46 +146,23 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, - void *DstMem, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, int Pattern, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, - int Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, pi_mem_advice Advice, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, - pi_mem_advice Advice, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void copy_2d_usm(const void *SrcMem, size_t SrcPitch, QueueImplPtr Queue, void *DstMem, size_t DstPitch, size_t Width, size_t Height, @@ -212,13 +170,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void copy_2d_usm(const void *SrcMem, size_t SrcPitch, - QueueImplPtr Queue, void *DstMem, size_t DstPitch, - size_t Width, size_t Height, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, const std::vector &Pattern, @@ -226,25 +177,12 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void fill_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, - size_t Width, size_t Height, - const std::vector &Pattern, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void memset_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, size_t Width, size_t Height, char Value, std::vector DepEvents, sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void memset_2d_usm(void *DstMem, QueueImplPtr Queue, size_t Pitch, - size_t Width, size_t Height, char Value, - std::vector DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, @@ -253,14 +191,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void - copy_to_device_global(const void *DeviceGlobalPtr, bool IsDeviceImageScoped, - QueueImplPtr Queue, size_t NumBytes, size_t Offset, - const void *SrcMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - static void copy_from_device_global( const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, size_t NumBytes, size_t Offset, void *DstMem, @@ -268,13 +198,6 @@ class __SYCL_EXPORT MemoryManager { sycl::detail::pi::PiEvent *OutEvent, const detail::EventImplPtr &OutEventImpl); - // TODO: This function will remain until ABI-breaking change - static void copy_from_device_global( - const void *DeviceGlobalPtr, bool IsDeviceImageScoped, QueueImplPtr Queue, - size_t NumBytes, size_t Offset, void *DstMem, - const std::vector &DepEvents, - sycl::detail::pi::PiEvent *OutEvent); - // Command buffer extension methods static void ext_oneapi_copyD2D_cmd_buffer( sycl::detail::ContextImplPtr Context, diff --git a/sycl/source/detail/physical_mem_impl.hpp b/sycl/source/detail/physical_mem_impl.hpp new file mode 100644 index 0000000000000..e36830ba07bee --- /dev/null +++ b/sycl/source/detail/physical_mem_impl.hpp @@ -0,0 +1,94 @@ +//==- physical_mem_impl.hpp - sycl_ext_oneapi_virtual_mem physical_mem impl ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +inline sycl::detail::pi::PiVirtualAccessFlags AccessModeToVirtualAccessFlags( + ext::oneapi::experimental::address_access_mode Mode) { + switch (Mode) { + case ext::oneapi::experimental::address_access_mode::read: + return PI_VIRTUAL_ACCESS_FLAG_READ_ONLY; + case ext::oneapi::experimental::address_access_mode::read_write: + return PI_VIRTUAL_ACCESS_FLAG_RW; + case ext::oneapi::experimental::address_access_mode::none: + return 0; + } + throw sycl::exception(make_error_code(errc::invalid), + "Invalid address_access_mode."); +} + +class physical_mem_impl { +public: + physical_mem_impl(const device &SyclDevice, const context &SyclContext, + size_t NumBytes) + : MDevice(getSyclObjImpl(SyclDevice)), + MContext(getSyclObjImpl(SyclContext)), MNumBytes(NumBytes) { + const PluginPtr &Plugin = MContext->getPlugin(); + + auto Err = Plugin->call_nocheck( + MContext->getHandleRef(), MDevice->getHandleRef(), MNumBytes, + &MPhysicalMem); + + if (Err == PI_ERROR_OUT_OF_RESOURCES || Err == PI_ERROR_OUT_OF_HOST_MEMORY) + throw sycl::exception(make_error_code(errc::memory_allocation), + "Failed to allocate physical memory."); + Plugin->checkPiResult(Err); + } + + ~physical_mem_impl() noexcept(false) { + const PluginPtr &Plugin = MContext->getPlugin(); + Plugin->call(MPhysicalMem); + } + + void *map(uintptr_t Ptr, size_t NumBytes, + ext::oneapi::experimental::address_access_mode Mode, + size_t Offset) const { + sycl::detail::pi::PiVirtualAccessFlags AccessFlags = + AccessModeToVirtualAccessFlags(Mode); + const PluginPtr &Plugin = MContext->getPlugin(); + void *ResultPtr = reinterpret_cast(Ptr); + Plugin->call( + MContext->getHandleRef(), ResultPtr, NumBytes, MPhysicalMem, Offset, + AccessFlags); + return ResultPtr; + } + + context get_context() const { + return createSyclObjFromImpl(MContext); + } + device get_device() const { return createSyclObjFromImpl(MDevice); } + size_t size() const noexcept { return MNumBytes; } + + sycl::detail::pi::PiPhysicalMem &getHandleRef() { return MPhysicalMem; } + const sycl::detail::pi::PiPhysicalMem &getHandleRef() const { + return MPhysicalMem; + } + +private: + sycl::detail::pi::PiPhysicalMem MPhysicalMem = nullptr; + const std::shared_ptr MDevice; + const std::shared_ptr MContext; + const size_t MNumBytes; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index e98c67ea30281..ee0ef9e26c35d 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -68,9 +68,6 @@ template void *getPluginOpaqueData(void *OpaqueDataParam) { return ReturnOpaqueData; } -template __SYCL_EXPORT void * -getPluginOpaqueData(void *); - namespace pi { static void initializePlugins(std::vector &Plugins); @@ -514,8 +511,6 @@ template const PluginPtr &getPlugin() { template __SYCL_EXPORT const PluginPtr &getPlugin(); template __SYCL_EXPORT const PluginPtr & getPlugin(); -template __SYCL_EXPORT const PluginPtr & -getPlugin(); template __SYCL_EXPORT const PluginPtr &getPlugin(); template __SYCL_EXPORT const PluginPtr &getPlugin(); diff --git a/sycl/source/detail/pi_utils.hpp b/sycl/source/detail/pi_utils.hpp index 877cbd0d14e52..fa288c91fc583 100644 --- a/sycl/source/detail/pi_utils.hpp +++ b/sycl/source/detail/pi_utils.hpp @@ -31,9 +31,14 @@ struct OwnedPiEvent { MPlugin->call(*MEvent); } ~OwnedPiEvent() { - // Release the event if the ownership was not transferred. - if (MEvent.has_value()) - MPlugin->call(*MEvent); + try { + // Release the event if the ownership was not transferred. + if (MEvent.has_value()) + MPlugin->call(*MEvent); + + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~OwnedPiEvent", e); + } } OwnedPiEvent(OwnedPiEvent &&Other) diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index d65f3163b961f..6952c6d45ebd2 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -207,10 +207,14 @@ program_impl::program_impl(ContextImplPtr Context, } program_impl::~program_impl() { - // TODO catch an exception and put it to list of asynchronous exceptions - if (!is_host() && MProgram != nullptr) { - const PluginPtr &Plugin = getPlugin(); - Plugin->call(MProgram); + try { + // TODO catch an exception and put it to list of asynchronous exceptions + if (!is_host() && MProgram != nullptr) { + const PluginPtr &Plugin = getPlugin(); + Plugin->call(MProgram); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~program_impl", e); } } diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 8865c342646eb..5b873039cd4a1 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -146,7 +146,8 @@ queue_impl::getExtendDependencyList(const std::vector &DepEvents, event queue_impl::memset(const std::shared_ptr &Self, void *Ptr, int Value, size_t Count, - const std::vector &DepEvents) { + const std::vector &DepEvents, + bool CallerNeedsEvent) { #if XPTI_ENABLE_INSTRUMENTATION // We need a code pointer value and we use the object ptr; if code location // information is available, we will have function name and source file @@ -173,7 +174,8 @@ event queue_impl::memset(const std::shared_ptr &Self, #endif return submitMemOpHelper( - Self, DepEvents, [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, + Self, DepEvents, CallerNeedsEvent, + [&](handler &CGH) { CGH.memset(Ptr, Value, Count); }, [](const auto &...Args) { MemoryManager::fill_usm(Args...); }, Ptr, Self, Count, Value); } @@ -194,7 +196,7 @@ void report(const code_location &CodeLoc) { event queue_impl::memcpy(const std::shared_ptr &Self, void *Dest, const void *Src, size_t Count, const std::vector &DepEvents, - const code_location &CodeLoc) { + bool CallerNeedsEvent, const code_location &CodeLoc) { #if XPTI_ENABLE_INSTRUMENTATION // We need a code pointer value and we duse the object ptr; If code location // is available, we use the source file information along with the object @@ -225,7 +227,8 @@ event queue_impl::memcpy(const std::shared_ptr &Self, PI_ERROR_INVALID_VALUE); } return submitMemOpHelper( - Self, DepEvents, [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }, + Self, DepEvents, CallerNeedsEvent, + [&](handler &CGH) { CGH.memcpy(Dest, Src, Count); }, [](const auto &...Args) { MemoryManager::copy_usm(Args...); }, Src, Self, Count, Dest); } @@ -233,9 +236,10 @@ event queue_impl::memcpy(const std::shared_ptr &Self, event queue_impl::mem_advise(const std::shared_ptr &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, - const std::vector &DepEvents) { + const std::vector &DepEvents, + bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, + Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.mem_advise(Ptr, Length, Advice); }, [](const auto &...Args) { MemoryManager::advise_usm(Args...); }, Ptr, Self, Length, Advice); @@ -244,9 +248,9 @@ event queue_impl::mem_advise(const std::shared_ptr &Self, event queue_impl::memcpyToDeviceGlobal( const std::shared_ptr &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, - const std::vector &DepEvents) { + const std::vector &DepEvents, bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, + Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpyToDeviceGlobal(DeviceGlobalPtr, Src, IsDeviceImageScope, NumBytes, Offset); @@ -260,9 +264,9 @@ event queue_impl::memcpyToDeviceGlobal( event queue_impl::memcpyFromDeviceGlobal( const std::shared_ptr &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, - size_t Offset, const std::vector &DepEvents) { + size_t Offset, const std::vector &DepEvents, bool CallerNeedsEvent) { return submitMemOpHelper( - Self, DepEvents, + Self, DepEvents, CallerNeedsEvent, [&](handler &CGH) { CGH.memcpyFromDeviceGlobal(Dest, DeviceGlobalPtr, IsDeviceImageScope, NumBytes, Offset); @@ -348,9 +352,11 @@ event queue_impl::submit_impl(const std::function &CGF, const std::shared_ptr &Self, const std::shared_ptr &PrimaryQueue, const std::shared_ptr &SecondaryQueue, + bool CallerNeedsEvent, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess) { - handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue); + handler Handler(Self, PrimaryQueue, SecondaryQueue, MHostQueue, + CallerNeedsEvent); Handler.saveCodeLoc(Loc); { @@ -399,6 +405,7 @@ event queue_impl::submitWithHandler(const std::shared_ptr &Self, template event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, const std::vector &DepEvents, + bool CallerNeedsEvent, HandlerFuncT HandlerFunc, MemOpFuncT MemOpFunc, MemOpArgTs... MemOpArgs) { @@ -415,7 +422,8 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, // handler rather than by-passing the scheduler. if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass( ExpandedDepEvents, MContext)) { - if (MSupportsDiscardingPiEvents) { + if ((MDiscardEvents || !CallerNeedsEvent) && + supportsDiscardingPiEvents()) { NestedCallsTracker tracker; MemOpFunc(MemOpArgs..., getPIEvents(ExpandedDepEvents), /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr); @@ -646,10 +654,17 @@ bool queue_impl::ext_oneapi_empty() const { // the status of the last event. if (isInOrder() && !MDiscardEvents) { std::lock_guard Lock(MMutex); - return !MDefaultGraphDeps.LastEventPtr || - MDefaultGraphDeps.LastEventPtr - ->get_info() == - info::event_command_status::complete; + // If there is no last event we know that no work has been submitted, so it + // must be trivially empty. + if (!MDefaultGraphDeps.LastEventPtr) + return true; + // Otherwise, check if the last event is finished. + // Note that we fall back to the backend query if the event was discarded, + // which may happend despite the queue not being a discard event queue. + if (!MDefaultGraphDeps.LastEventPtr->isDiscarded()) + return MDefaultGraphDeps.LastEventPtr + ->get_info() == + info::event_command_status::complete; } // Check the status of the backend queue if this is not a host queue. diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index d87db1d7ef2e5..4878134ec1e92 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -111,8 +111,6 @@ class queue_impl { MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { if (has_property()) { @@ -292,8 +290,6 @@ class queue_impl { MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { queue_impl_interop(PiQueue); @@ -313,32 +309,34 @@ class queue_impl { MIsInorder(has_property()), MDiscardEvents( has_property()), - MIsProfilingEnabled(has_property()), - MSupportsDiscardingPiEvents(MDiscardEvents && - (MHostQueue ? true : MIsInorder)) { + MIsProfilingEnabled(has_property()) { queue_impl_interop(PiQueue); } ~queue_impl() { - // The trace event created in the constructor should be active through the - // lifetime of the queue object as member variables when ABI breakage is - // allowed. This example shows MTraceEvent as a member variable. + try { + // The trace event created in the constructor should be active through the + // lifetime of the queue object as member variables when ABI breakage is + // allowed. This example shows MTraceEvent as a member variable. #if XPTI_ENABLE_INSTRUMENTATION - constexpr uint16_t NotificationTraceType = - static_cast(xpti::trace_point_type_t::queue_destroy); - if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) { - // Used cached information in member variables - xptiNotifySubscribers(MStreamID, NotificationTraceType, nullptr, - (xpti::trace_event_data_t *)MTraceEvent, - MInstanceID, - static_cast("queue_destroy")); - xptiReleaseEvent((xpti::trace_event_data_t *)MTraceEvent); - } + constexpr uint16_t NotificationTraceType = + static_cast(xpti::trace_point_type_t::queue_destroy); + if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) { + // Used cached information in member variables + xptiNotifySubscribers(MStreamID, NotificationTraceType, nullptr, + (xpti::trace_event_data_t *)MTraceEvent, + MInstanceID, + static_cast("queue_destroy")); + xptiReleaseEvent((xpti::trace_event_data_t *)MTraceEvent); + } #endif - throw_asynchronous(); - if (!MHostQueue) { - cleanup_fusion_cmd(); - getPlugin()->call(MQueues[0]); + throw_asynchronous(); + if (!MHostQueue) { + cleanup_fusion_cmd(); + getPlugin()->call(MQueues[0]); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~queue_impl", e); } } @@ -370,9 +368,12 @@ class queue_impl { /// \return true if this queue is a SYCL host queue. bool is_host() const { return MHostQueue; } - /// \return true if this queue has discard_events support. + /// \return true if the discard event property was set at time of creation. + bool hasDiscardEventsProperty() const { return MDiscardEvents; } + + /// \return true if this queue allows for discarded events. bool supportsDiscardingPiEvents() const { - return MSupportsDiscardingPiEvents; + return MHostQueue ? true : MIsInorder; } bool isInOrder() const { return MIsInorder; } @@ -424,10 +425,12 @@ class queue_impl { const SubmitPostProcessF *PostProcess = nullptr) { event ResEvent; try { - ResEvent = submit_impl(CGF, Self, Self, SecondQueue, Loc, PostProcess); + ResEvent = submit_impl(CGF, Self, Self, SecondQueue, + /*CallerNeedsEvent=*/true, Loc, PostProcess); } catch (...) { - ResEvent = SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, - Loc, PostProcess); + ResEvent = + SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, + /*CallerNeedsEvent=*/true, Loc, PostProcess); } return discard_or_return(ResEvent); } @@ -444,10 +447,19 @@ class queue_impl { const std::shared_ptr &Self, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess = nullptr) { - auto ResEvent = submit_impl(CGF, Self, Self, nullptr, Loc, PostProcess); + auto ResEvent = submit_impl(CGF, Self, Self, nullptr, + /*CallerNeedsEvent=*/true, Loc, PostProcess); return discard_or_return(ResEvent); } + void submit_without_event(const std::function &CGF, + const std::shared_ptr &Self, + const detail::code_location &Loc, + const SubmitPostProcessF *PostProcess = nullptr) { + submit_impl(CGF, Self, Self, nullptr, /*CallerNeedsEvent=*/false, Loc, + PostProcess); + } + /// Performs a blocking wait for the completion of all enqueued tasks in the /// queue. /// @@ -651,9 +663,11 @@ class queue_impl { /// \param Count is a number of bytes to fill. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. + /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing fill operation. event memset(const std::shared_ptr &Self, void *Ptr, int Value, - size_t Count, const std::vector &DepEvents); + size_t Count, const std::vector &DepEvents, + bool CallerNeedsEvent); /// Copies data from one memory region to another, both pointed by /// USM pointers. /// @@ -663,10 +677,11 @@ class queue_impl { /// \param Count is a number of bytes to copy. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. + /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing copy operation. event memcpy(const std::shared_ptr &Self, void *Dest, const void *Src, size_t Count, - const std::vector &DepEvents, + const std::vector &DepEvents, bool CallerNeedsEvent, const code_location &CodeLoc); /// Provides additional information to the underlying runtime about how /// different allocations are used. @@ -677,10 +692,11 @@ class queue_impl { /// \param Advice is a device-defined advice for the specified allocation. /// \param DepEvents is a vector of events that specifies the kernel /// dependencies. + /// \param CallerNeedsEvent specifies if the caller expects a usable event. /// \return an event representing advise operation. event mem_advise(const std::shared_ptr &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, - const std::vector &DepEvents); + const std::vector &DepEvents, bool CallerNeedsEvent); /// Puts exception to the list of asynchronous ecxeptions. /// @@ -718,13 +734,14 @@ class queue_impl { event memcpyToDeviceGlobal(const std::shared_ptr &Self, void *DeviceGlobalPtr, const void *Src, bool IsDeviceImageScope, size_t NumBytes, - size_t Offset, - const std::vector &DepEvents); + size_t Offset, const std::vector &DepEvents, + bool CallerNeedsEvent); event memcpyFromDeviceGlobal(const std::shared_ptr &Self, void *Dest, const void *DeviceGlobalPtr, bool IsDeviceImageScope, size_t NumBytes, size_t Offset, - const std::vector &DepEvents); + const std::vector &DepEvents, + bool CallerNeedsEvent); bool isProfilingFallback() { return MFallbackProfiling; } @@ -780,6 +797,14 @@ class queue_impl { // Hook to the scheduler to clean up any fusion command held on destruction. void cleanup_fusion_cmd(); + template + EventImplPtr insertHelperBarrier(const HandlerType &Handler) { + auto ResEvent = std::make_shared(Handler.MQueue); + getPlugin()->call( + Handler.MQueue->getHandleRef(), 0, nullptr, &ResEvent->getHandleRef()); + return ResEvent; + } + // template is needed for proper unit testing template void finalizeHandler(HandlerType &Handler, event &EventRet) { @@ -787,6 +812,10 @@ class queue_impl { // Accessing and changing of an event isn't atomic operation. // Hence, here is the lock for thread-safety. std::lock_guard Lock{MMutex}; + + auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr + : MExtGraphDeps.LastEventPtr; + // This dependency is needed for the following purposes: // - host tasks are handled by the runtime and cannot be implicitly // synchronized by the backend. @@ -794,11 +823,19 @@ class queue_impl { // by a host task. This dependency allows to build the enqueue order in // the RT but will not be passed to the backend. See getPIEvents in // Command. - - auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr - : MExtGraphDeps.LastEventPtr; - if (EventToBuildDeps) - Handler.depends_on(EventToBuildDeps); + if (EventToBuildDeps) { + // In the case where the last event was discarded and we are to run a + // host_task, we insert a barrier into the queue and use the resulting + // event as the dependency for the host_task. + // Note that host_task events can never be discarded, so this will not + // insert barriers between host_task enqueues. + if (EventToBuildDeps->isDiscarded() && + Handler.getType() == CG::CodeplayHostTask) + EventToBuildDeps = insertHelperBarrier(Handler); + + if (!EventToBuildDeps->isDiscarded()) + Handler.depends_on(EventToBuildDeps); + } // If there is an external event set, add it as a dependency and clear it. // We do not need to hold the lock as MLastEventMtx will ensure the last @@ -852,13 +889,15 @@ class queue_impl { /// same as Self. /// \param SecondaryQueue is a pointer to the secondary queue. This may be the /// same as Self. + /// \param CallerNeedsEvent is a boolean indicating whether the event is + /// required by the user after the call. /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event representing submitted command group. event submit_impl(const std::function &CGF, const std::shared_ptr &Self, const std::shared_ptr &PrimaryQueue, const std::shared_ptr &SecondaryQueue, - const detail::code_location &Loc, + bool CallerNeedsEvent, const detail::code_location &Loc, const SubmitPostProcessF *PostProcess); /// Helper function for submitting a memory operation with a handler. @@ -876,6 +915,8 @@ class queue_impl { /// /// \param Self is a shared_ptr to this queue. /// \param DepEvents is a vector of dependencies of the operation. + /// \param CallerNeedsEvent specifies if the caller needs an event from this + /// memory operation. /// \param HandlerFunc is a function that submits the operation with a /// handler. /// \param MemMngrFunc is a function that forwards its arguments to the @@ -888,8 +929,8 @@ class queue_impl { typename... MemMngrArgTs> event submitMemOpHelper(const std::shared_ptr &Self, const std::vector &DepEvents, - HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, - MemMngrArgTs... MemOpArgs); + bool CallerNeedsEvent, HandlerFuncT HandlerFunc, + MemMngrFuncT MemMngrFunc, MemMngrArgTs... MemOpArgs); // When instrumentation is enabled emits trace event for wait begin and // returns the telemetry event generated for the wait @@ -987,12 +1028,6 @@ class queue_impl { const bool MIsProfilingEnabled; protected: - // Indicates whether the queue supports discarding PI events for tasks - // submitted to it. This condition is necessary but not sufficient, PI events - // should be discarded only if they also don't represent potential implicit - // dependencies for future tasks in other queues. - const bool MSupportsDiscardingPiEvents; - // Command graph which is associated with this queue for the purposes of // recording commands to it. std::weak_ptr MGraph{}; diff --git a/sycl/source/detail/sampler_impl.cpp b/sycl/source/detail/sampler_impl.cpp index c2af7884a164c..edca8eb1be025 100644 --- a/sycl/source/detail/sampler_impl.cpp +++ b/sycl/source/detail/sampler_impl.cpp @@ -40,11 +40,16 @@ sampler_impl::sampler_impl(cl_sampler clSampler, const context &syclContext) { } sampler_impl::~sampler_impl() { - std::lock_guard Lock(MMutex); - for (auto &Iter : MContextToSampler) { - // TODO catch an exception and add it to the list of asynchronous exceptions - const PluginPtr &Plugin = getSyclObjImpl(Iter.first)->getPlugin(); - Plugin->call(Iter.second); + try { + std::lock_guard Lock(MMutex); + for (auto &Iter : MContextToSampler) { + // TODO catch an exception and add it to the list of asynchronous + // exceptions + const PluginPtr &Plugin = getSyclObjImpl(Iter.first)->getPlugin(); + Plugin->call(Iter.second); + } + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~sample_impl", e); } } diff --git a/sycl/source/detail/sampler_impl.hpp b/sycl/source/detail/sampler_impl.hpp index 7b3878e149ff9..4ae318c29ffbe 100644 --- a/sycl/source/detail/sampler_impl.hpp +++ b/sycl/source/detail/sampler_impl.hpp @@ -25,7 +25,7 @@ enum class filtering_mode : unsigned int; enum class coordinate_normalization_mode : unsigned int; namespace detail { -class __SYCL_EXPORT sampler_impl { +class sampler_impl { public: sampler_impl(coordinate_normalization_mode normalizationMode, addressing_mode addressingMode, filtering_mode filteringMode, diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a164c455fed54..fd467a5b178db 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -317,11 +317,25 @@ static void flushCrossQueueDeps(const std::vector &EventImpls, } } +namespace { + +struct EnqueueNativeCommandData { + sycl::interop_handle ih; + std::function func; +}; + +void InteropFreeFunc(pi_queue InteropQueue, void *InteropData) { + auto *Data = reinterpret_cast(InteropData); + return Data->func(Data->ih); +} +} // namespace + class DispatchHostTask { ExecCGCommand *MThisCmd; std::vector MReqToMem; + std::vector MReqPiMem; - pi_result waitForEvents() const { + bool waitForEvents() const { std::map> RequiredEventsPerPlugin; @@ -343,14 +357,14 @@ class DispatchHostTask { try { PluginWithEvents.first->call(RawEvents.size(), RawEvents.data()); - } catch (const sycl::exception &E) { + } catch (const sycl::exception &) { CGHostTask &HostTask = static_cast(MThisCmd->getCG()); HostTask.MQueue->reportAsyncException(std::current_exception()); - return (pi_result)E.get_cl_code(); + return false; } catch (...) { CGHostTask &HostTask = static_cast(MThisCmd->getCG()); HostTask.MQueue->reportAsyncException(std::current_exception()); - return PI_ERROR_UNKNOWN; + return false; } } @@ -360,13 +374,15 @@ class DispatchHostTask { Event->waitInternal(); } - return PI_SUCCESS; + return true; } public: DispatchHostTask(ExecCGCommand *ThisCmd, - std::vector ReqToMem) - : MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)) {} + std::vector ReqToMem, + std::vector ReqPiMem) + : MThisCmd{ThisCmd}, MReqToMem(std::move(ReqToMem)), + MReqPiMem(std::move(ReqPiMem)) {} void operator()() const { assert(MThisCmd->getCG().getType() == CG::CGTYPE::CodeplayHostTask); @@ -385,11 +401,10 @@ class DispatchHostTask { } #endif - pi_result WaitResult = waitForEvents(); - if (WaitResult != PI_SUCCESS) { - std::exception_ptr EPtr = std::make_exception_ptr(sycl::runtime_error( - std::string("Couldn't wait for host-task's dependencies"), - WaitResult)); + if (!waitForEvents()) { + std::exception_ptr EPtr = std::make_exception_ptr(sycl::exception( + make_error_code(errc::runtime), + std::string("Couldn't wait for host-task's dependencies"))); HostTask.MQueue->reportAsyncException(EPtr); // reset host-task's lambda and quit HostTask.MHostTask.reset(); @@ -403,8 +418,32 @@ class DispatchHostTask { interop_handle IH{MReqToMem, HostTask.MQueue, HostTask.MQueue->getDeviceImplPtr(), HostTask.MQueue->getContextImplPtr()}; - - HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), IH); + // TODO: should all the backends that support this entry point use this + // for host task? + auto &Queue = HostTask.MQueue; + bool NativeCommandSupport = false; + Queue->getPlugin()->call( + detail::getSyclObjImpl(Queue->get_device())->getHandleRef(), + PI_EXT_ONEAPI_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT, + sizeof(NativeCommandSupport), &NativeCommandSupport, nullptr); + if (NativeCommandSupport) { + EnqueueNativeCommandData CustomOpData{ + IH, HostTask.MHostTask->MInteropTask}; + + // We are assuming that we have already synchronized with the HT's + // dependent events, and that the user will synchronize before the end + // of the HT lambda. As such we don't pass in any events, or ask for + // one back. + // + // This entry point is needed in order to migrate memory across + // devices in the same context for CUDA and HIP backends + Queue->getPlugin()->call( + HostTask.MQueue->getHandleRef(), InteropFreeFunc, &CustomOpData, + MReqPiMem.size(), MReqPiMem.data(), 0, nullptr, nullptr); + } else { + HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo(), + IH); + } } else HostTask.MHostTask->call(MThisCmd->MEvent->getHostProfilingInfo()); } catch (...) { @@ -1927,11 +1966,11 @@ static std::string_view cgTypeToString(detail::CG::CGTYPE Type) { ExecCGCommand::ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer, Dependencies), - MCommandGroup(std::move(CommandGroup)) { + MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) { if (MCommandGroup->getType() == detail::CG::CodeplayHostTask) { MEvent->setSubmittedQueue( static_cast(MCommandGroup.get())->MQueue); @@ -2759,11 +2798,15 @@ pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { Plugin->call(RawEvents.size(), &RawEvents[0]); } + // We can omit creating a PI event and create a "discarded" event if either + // the queue has the discard property or the command has been explicitly + // marked as not needing an event, e.g. if the user did not ask for one, and + // if the queue supports discarded PI event and there are no requirements. + bool DiscardPiEvent = (MQueue->MDiscardEvents || !MEventNeeded) && + MQueue->supportsDiscardingPiEvents() && + MCommandGroup->getRequirements().size() == 0; sycl::detail::pi::PiEvent *Event = - (MQueue->supportsDiscardingPiEvents() && - MCommandGroup->getRequirements().size() == 0) - ? nullptr - : &MEvent->getHandleRef(); + DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); sycl::detail::pi::PiExtSyncPoint OutSyncPoint; sycl::detail::pi::PiExtCommandBufferCommand OutCommand = nullptr; switch (MCommandGroup->getType()) { @@ -2910,8 +2953,13 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { auto RawEvents = getPiEvents(EventImpls); flushCrossQueueDeps(EventImpls, getWorkerQueue()); - bool DiscardPiEvent = (MQueue->supportsDiscardingPiEvents() && - MCommandGroup->getRequirements().size() == 0); + // We can omit creating a PI event and create a "discarded" event if either + // the queue has the discard property or the command has been explicitly + // marked as not needing an event, e.g. if the user did not ask for one, and + // if the queue supports discarded PI event and there are no requirements. + bool DiscardPiEvent = (MQueue->MDiscardEvents || !MEventNeeded) && + MQueue->supportsDiscardingPiEvents() && + MCommandGroup->getRequirements().size() == 0; sycl::detail::pi::PiEvent *Event = DiscardPiEvent ? nullptr : &MEvent->getHandleRef(); detail::EventImplPtr EventImpl = DiscardPiEvent ? nullptr : MEvent; @@ -2992,8 +3040,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { NDRDescT &NDRDesc = ExecKernel->MNDRDesc; std::vector &Args = ExecKernel->MArgs; - if (MQueue->is_host() || (MQueue->getDeviceImplPtr()->getBackend() == - backend::ext_intel_esimd_emulator)) { + if (MQueue->is_host()) { for (ArgDesc &Arg : Args) if (kernel_param_kind_t::kind_accessor == Arg.MType) { Requirement *Req = (Requirement *)(Arg.MPtr); @@ -3006,20 +3053,8 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { Plugin->call(RawEvents.size(), &RawEvents[0]); } - if (MQueue->is_host()) { - ExecKernel->MHostKernel->call(NDRDesc, - getEvent()->getHostProfilingInfo()); - } else { - assert(MQueue->getDeviceImplPtr()->getBackend() == - backend::ext_intel_esimd_emulator); - if (MEvent != nullptr) - MEvent->setHostEnqueueTime(); - MQueue->getPlugin()->call( - nullptr, - reinterpret_cast(ExecKernel->MHostKernel->getPtr()), - NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], - &NDRDesc.LocalSize[0], 0, nullptr, nullptr); - } + ExecKernel->MHostKernel->call(NDRDesc, + getEvent()->getHostProfilingInfo()); return PI_SUCCESS; } @@ -3126,13 +3161,14 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { } std::vector ReqToMem; + std::vector ReqPiMem; if (HostTask->MHostTask->isInteropTask()) { // Extract the Mem Objects for all Requirements, to ensure they are // available if a user asks for them inside the interop task scope const std::vector &HandlerReq = HostTask->getRequirements(); - auto ReqToMemConv = [&ReqToMem, HostTask](Requirement *Req) { + auto ReqToMemConv = [&ReqToMem, &ReqPiMem, HostTask](Requirement *Req) { const std::vector &AllocaCmds = Req->MSYCLMemObj->MRecord->MAllocaCommands; @@ -3142,6 +3178,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); ReqToMem.emplace_back(std::make_pair(Req, MemArg)); + ReqPiMem.emplace_back(MemArg); return; } @@ -3163,7 +3200,7 @@ pi_int32 ExecCGCommand::enqueueImpQueue() { copySubmissionCodeLocation(); MQueue->getThreadPool().submit( - DispatchHostTask(this, std::move(ReqToMem))); + DispatchHostTask(this, std::move(ReqToMem), std::move(ReqPiMem))); MShouldCompleteEventIfPossible = false; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 8ba0cceee9e6a..ea1a5b5111149 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -648,6 +648,7 @@ class ExecCGCommand : public Command { public: ExecCGCommand( std::unique_ptr CommandGroup, QueueImplPtr Queue, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, const std::vector &Dependencies = {}); @@ -672,6 +673,11 @@ class ExecCGCommand : public Command { // necessary. KernelFusionCommand *MFusionCmd = nullptr; + // MEventNeeded is true if the command needs to produce a valid event. The + // implementation may elect to not produce events (native or SYCL) if this + // is false. + bool MEventNeeded = true; + bool producesPiEvent() const final; bool supportsPostEnqueueCleanup() const final; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index f8c2bb27855e3..a17c45720733c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -943,14 +943,15 @@ static void combineAccessModesOfReqs(std::vector &Reqs) { Scheduler::GraphBuildResult Scheduler::GraphBuilder::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - std::vector &ToEnqueue, + std::vector &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) { std::vector &Reqs = CommandGroup->getRequirements(); std::vector &Events = CommandGroup->getEvents(); - auto NewCmd = std::make_unique( - std::move(CommandGroup), Queue, CommandBuffer, std::move(Dependencies)); + auto NewCmd = std::make_unique(std::move(CommandGroup), Queue, + EventNeeded, CommandBuffer, + std::move(Dependencies)); if (!NewCmd) throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); @@ -1350,7 +1351,8 @@ Command *Scheduler::GraphBuilder::connectDepEvent( CG::CodeplayHostTask, /* Payload */ {})); ConnectCmd = new ExecCGCommand( - std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue()); + std::move(ConnectCG), Scheduler::getInstance().getDefaultHostQueue(), + /*EventNeeded=*/true); } catch (const std::bad_alloc &) { throw runtime_error("Out of host memory", PI_ERROR_OUT_OF_HOST_MEMORY); } @@ -1624,8 +1626,8 @@ Scheduler::GraphBuilder::completeFusion(QueueImplPtr Queue, }), FusedEventDeps.end()); - auto FusedKernelCmd = - std::make_unique(std::move(FusedCG), Queue); + auto FusedKernelCmd = std::make_unique( + std::move(FusedCG), Queue, /*EventNeeded=*/true); // Inherit auxiliary resources from fused command groups Scheduler::getInstance().takeAuxiliaryResources(FusedKernelCmd->getEvent(), diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 99975edb7d649..78fd300460526 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -94,7 +94,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, EventImplPtr Scheduler::addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - sycl::detail::pi::PiExtCommandBuffer CommandBuffer, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer, const std::vector &Dependencies) { EventImplPtr NewEvent = nullptr; const CG::CGTYPE Type = CommandGroup->getType(); @@ -130,17 +130,18 @@ EventImplPtr Scheduler::addCG( NewEvent = NewCmd->getEvent(); break; case CG::CodeplayHostTask: { - auto Result = MGraphBuilder.addCG(std::move(CommandGroup), - DefaultHostQueue, AuxiliaryCmds); + auto Result = + MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue, + AuxiliaryCmds, EventNeeded); NewCmd = Result.NewCmd; NewEvent = Result.NewEvent; ShouldEnqueue = Result.ShouldEnqueue; break; } default: - auto Result = MGraphBuilder.addCG(std::move(CommandGroup), - std::move(Queue), AuxiliaryCmds, - CommandBuffer, std::move(Dependencies)); + auto Result = MGraphBuilder.addCG( + std::move(CommandGroup), std::move(Queue), AuxiliaryCmds, EventNeeded, + CommandBuffer, std::move(Dependencies)); NewCmd = Result.NewCmd; NewEvent = Result.NewEvent; diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 9ce3d7d2a5f94..124fc1181116c 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -372,6 +372,7 @@ class Scheduler { /// /// \param CommandGroup is a unique_ptr to a command group to be added. /// \param Queue Queue that is registering the command-group. + /// \param EventNeeded Specifies whether an event is explicitly required. /// \param CommandBuffer Optional command buffer to enqueue to instead of /// directly to the queue. /// \param Dependencies Optional list of dependency @@ -379,6 +380,7 @@ class Scheduler { /// \return an event object to wait on for command group completion. EventImplPtr addCG(std::unique_ptr CommandGroup, const QueueImplPtr &Queue, + bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, const std::vector &Dependencies = {}); @@ -602,7 +604,7 @@ class Scheduler { /// processor right away or not. GraphBuildResult addCG( std::unique_ptr CommandGroup, const QueueImplPtr &Queue, - std::vector &ToEnqueue, + std::vector &ToEnqueue, bool EventNeeded, sycl::detail::pi::PiExtCommandBuffer CommandBuffer = nullptr, const std::vector &Dependencies = {}); diff --git a/sycl/source/detail/stream_impl.cpp b/sycl/source/detail/stream_impl.cpp index 4550b5cc26629..4ff380d7295c5 100644 --- a/sycl/source/detail/stream_impl.cpp +++ b/sycl/source/detail/stream_impl.cpp @@ -18,12 +18,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -stream_impl::stream_impl(size_t BufferSize, size_t MaxStatementSize, - handler &CGH) - : stream_impl(BufferSize, MaxStatementSize, {}) { - (void)CGH; -} - stream_impl::stream_impl(size_t BufferSize, size_t MaxStatementSize, const property_list &PropList) : BufferSize_(BufferSize), MaxStatementSize_(MaxStatementSize), @@ -72,10 +66,6 @@ size_t stream_impl::get_work_item_buffer_size() const { return MaxStatementSize_; } -size_t stream_impl::get_size() const { return BufferSize_; } - -size_t stream_impl::get_max_statement_size() const { return MaxStatementSize_; } - void stream_impl::initStreamHost(QueueImplPtr Queue) { // Real size of full flush buffer is saved only in buffer_impl field of // FlushBuf object. @@ -138,7 +128,6 @@ void stream_impl::flush(const EventImplPtr &LeadEvent) { } } -void stream_impl::flush() { flush(nullptr); } } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/stream_impl.hpp b/sycl/source/detail/stream_impl.hpp index 823653016c162..1578a8d7cb508 100644 --- a/sycl/source/detail/stream_impl.hpp +++ b/sycl/source/detail/stream_impl.hpp @@ -22,12 +22,8 @@ namespace sycl { inline namespace _V1 { namespace detail { -class __SYCL_EXPORT stream_impl { +class stream_impl { public: - // TODO: This constructor is unused. - // To be removed when API/ABI changes are allowed. - stream_impl(size_t BufferSize, size_t MaxStatementSize, handler &CGH); - stream_impl(size_t BufferSize, size_t MaxStatementSize, const property_list &PropList); @@ -49,20 +45,10 @@ class __SYCL_EXPORT stream_impl { // LeadEvent as well as in queue LeadEvent associated with. void flush(const EventImplPtr &LeadEvent); - // Enqueue task to copy stream buffer to the host and print the contents - // Remove during next ABI breaking window - void flush(); - size_t size() const noexcept; size_t get_work_item_buffer_size() const; - // TODO: Unusued. Remove when ABI-break is allowed. - size_t get_size() const; - - // TODO: Unusued. Remove when ABI-break is allowed. - size_t get_max_statement_size() const; - template bool has_property() const noexcept { return PropList_.has_property(); } diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index f67453d8ac221..d86468f4dc497 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -37,7 +37,7 @@ using ContextImplPtr = std::shared_ptr; using EventImplPtr = std::shared_ptr; // The class serves as a base for all SYCL memory objects. -class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { +class SYCLMemObjT : public SYCLMemObjI { // The check for output iterator is commented out as it blocks set_final_data // with void * argument to be used. diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 35adb98e9d570..304045389b53b 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -74,7 +74,13 @@ class ThreadPool { start(); } - ~ThreadPool() { finishAndWait(); } + ~ThreadPool() { + try { + finishAndWait(); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ThreadPool", e); + } + } void finishAndWait() { MStop.store(true); diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 70aa37aad26a2..20df5cf47256a 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -156,16 +156,9 @@ device::get_info_impl() const { #undef __SYCL_ASPECT }; - auto UnsupportedAspects = std::remove_if( - DeviceAspects.begin(), DeviceAspects.end(), [&](aspect Aspect) { - try { - return !impl->has(Aspect); - } catch (const runtime_error &ex) { - if (ex.get_cl_code() == PI_ERROR_INVALID_DEVICE) - return true; - throw; - } - }); + auto UnsupportedAspects = + std::remove_if(DeviceAspects.begin(), DeviceAspects.end(), + [&](aspect Aspect) { return !impl->has(Aspect); }); DeviceAspects.erase(UnsupportedAspects, DeviceAspects.end()); diff --git a/sycl/source/device_selector.cpp b/sycl/source/device_selector.cpp index 2716ae920c1e1..e857dfdf539a4 100644 --- a/sycl/source/device_selector.cpp +++ b/sycl/source/device_selector.cpp @@ -179,13 +179,6 @@ __SYCL_EXPORT int default_selector_v(const device &dev) { // The default selector doesn't reject any devices. int Score = 0; - // we give the esimd_emulator device a score of zero to prevent it from being - // chosen among other devices. The same thing is done for gpu_selector_v - // below. - if (dev.get_backend() == backend::ext_intel_esimd_emulator) { - return 0; - } - traceDeviceSelector("info::device_type::automatic"); if (dev.is_gpu()) @@ -209,10 +202,6 @@ __SYCL_EXPORT int default_selector_v(const device &dev) { __SYCL_EXPORT int gpu_selector_v(const device &dev) { int Score = detail::REJECT_DEVICE_SCORE; - if (dev.get_backend() == backend::ext_intel_esimd_emulator) { - return 0; - } - traceDeviceSelector("info::device_type::gpu"); if (dev.is_gpu()) { Score = 1000; diff --git a/sycl/source/enqueue_functions.cpp b/sycl/source/enqueue_functions.cpp new file mode 100644 index 0000000000000..b2e4f3f712f4b --- /dev/null +++ b/sycl/source/enqueue_functions.cpp @@ -0,0 +1,42 @@ +//==------ enqueue_functions.hpp ------- SYCL enqueue free functions -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +__SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, + const sycl::detail::code_location &CodeLoc) { + sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); + QueueImplPtr->memcpy(QueueImplPtr, Dest, Src, NumBytes, {}, + /*CallerNeedsEvent=*/false, CodeLoc); +} + +__SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes, + const sycl::detail::code_location &CodeLoc) { + sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); + QueueImplPtr->memset(QueueImplPtr, Ptr, Value, NumBytes, {}, + /*CallerNeedsEvent=*/false); +} + +__SYCL_EXPORT void mem_advise(queue Q, void *Ptr, size_t NumBytes, int Advice, + const sycl::detail::code_location &CodeLoc) { + sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); + auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); + QueueImplPtr->mem_advise(QueueImplPtr, Ptr, NumBytes, pi_mem_advice(Advice), + {}, /*CallerNeedsEvent=*/false); +} + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/esimd_emulator_device_interface.cpp b/sycl/source/esimd_emulator_device_interface.cpp deleted file mode 100644 index b9f065dce8552..0000000000000 --- a/sycl/source/esimd_emulator_device_interface.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//==--------------- esimd_emulator_device_interface.cpp --------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/// \file esimdcpu_device_interface.cpp -/// Definitions for ESIMD_EMULATOR-device specific definitions. -/// -/// This interface is for ESIMD intrinsic emulation implementations -/// such as slm_access to access ESIMD_EMULATOR specific-support therefore -/// it has to be defined and shared as include directory -/// -/// \ingroup sycl_pi_esimd_emulator - -#include - -namespace sycl { -inline namespace _V1 { -namespace detail { - -struct ESIMDDeviceInterface { - uintptr_t version; - void *reserved; - ESIMDDeviceInterface(); -}; - -// TODO: this function is kept only for libsycl binary backward compatibility. -// Remove it when ABI breaking changes are allowed. -__SYCL_EXPORT ESIMDDeviceInterface *getESIMDDeviceInterface() { - return nullptr; -} - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index ce88520fe50dd..f7e023c718462 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -86,6 +86,7 @@ inline namespace _V1 { #define SYCL_EXT_ONEAPI_ANNOTATED_ARG 1 #define SYCL_EXT_ONEAPI_ANNOTATED_PTR 1 #define SYCL_EXT_ONEAPI_COPY_OPTIMIZE 1 +#define SYCL_EXT_ONEAPI_VIRTUAL_MEM 1 #define SYCL_EXT_ONEAPI_USM_MALLOC_PROPERTIES 1 #cmakedefine01 SYCL_ENABLE_KERNEL_FUSION #if SYCL_ENABLE_KERNEL_FUSION diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 7cef9cc6ddd93..10ce364310912 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -80,15 +80,29 @@ void *getValueFromDynamicParameter( } // namespace detail +/// TODO: Unused. Remove with ABI break. handler::handler(std::shared_ptr Queue, bool IsHost) - : handler(Queue, Queue, nullptr, IsHost) {} + : handler(Queue, IsHost, /*CallerNeedsEvent=*/true) {} +/// TODO: Unused. Remove with ABI break. handler::handler(std::shared_ptr Queue, std::shared_ptr PrimaryQueue, std::shared_ptr SecondaryQueue, bool IsHost) + : handler(Queue, PrimaryQueue, SecondaryQueue, IsHost, + /*CallerNeedsEvent=*/true) {} + +handler::handler(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent) + : handler(Queue, Queue, nullptr, IsHost, CallerNeedsEvent) {} + +handler::handler(std::shared_ptr Queue, + std::shared_ptr PrimaryQueue, + std::shared_ptr SecondaryQueue, + bool IsHost, bool CallerNeedsEvent) : MImpl(std::make_shared(std::move(PrimaryQueue), - std::move(SecondaryQueue))), + std::move(SecondaryQueue), + CallerNeedsEvent)), MQueue(std::move(Queue)), MIsHost(IsHost) {} handler::handler( @@ -279,40 +293,10 @@ event handler::finalize() { : nullptr); Result = PI_SUCCESS; } else { - if (MQueue->getDeviceImplPtr()->getBackend() == - backend::ext_intel_esimd_emulator) { - // Capture the host timestamp for profiling (queue time) - if (NewEvent != nullptr) - NewEvent->setHostEnqueueTime(); - [&](auto... Args) { - if (MImpl->MKernelIsCooperative) { - MQueue->getPlugin() - ->call< - detail::PiApiKind::piextEnqueueCooperativeKernelLaunch>( - Args...); - } else { - MQueue->getPlugin() - ->call(Args...); - } - }(/* queue */ - nullptr, - /* kernel */ - reinterpret_cast(MHostKernel->getPtr()), - /* work_dim */ - MNDRDesc.Dims, - /* global_work_offset */ &MNDRDesc.GlobalOffset[0], - /* global_work_size */ &MNDRDesc.GlobalSize[0], - /* local_work_size */ &MNDRDesc.LocalSize[0], - /* num_events_in_wait_list */ 0, - /* event_wait_list */ nullptr, - /* event */ nullptr); - Result = PI_SUCCESS; - } else { - Result = enqueueImpKernel( - MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel, - MKernelName.c_str(), RawEvents, NewEvent, nullptr, - MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative); - } + Result = enqueueImpKernel( + MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, MKernel, + MKernelName.c_str(), RawEvents, NewEvent, nullptr, + MImpl->MKernelCacheConfig, MImpl->MKernelIsCooperative); } #ifdef XPTI_ENABLE_INSTRUMENTATION // Emit signal only when event is created @@ -327,8 +311,9 @@ event handler::finalize() { return Result; }; - bool DiscardEvent = false; - if (MQueue->supportsDiscardingPiEvents()) { + bool DiscardEvent = (MQueue->MDiscardEvents || !MImpl->MEventNeeded) && + MQueue->supportsDiscardingPiEvents(); + if (DiscardEvent) { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && @@ -341,6 +326,9 @@ event handler::finalize() { if (PI_SUCCESS != EnqueueKernel()) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); + auto EventImpl = std::make_shared( + detail::event_impl::HES_Discarded); + MLastEvent = detail::createSyclObjFromImpl(EventImpl); } else { NewEvent = std::make_shared(MQueue); NewEvent->setWorkerQueue(MQueue); @@ -606,7 +594,7 @@ event handler::finalize() { } detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( - std::move(CommandGroup), std::move(MQueue)); + std::move(CommandGroup), std::move(MQueue), MImpl->MEventNeeded); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; @@ -1811,5 +1799,7 @@ void handler::registerDynamicParameter( } MImpl->MDynamicParameters.emplace_back(ParamImpl.get(), ArgIndex); } + +bool handler::eventNeeded() const { return MImpl->MEventNeeded; } } // namespace _V1 } // namespace sycl diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 32ec35dbee837..aace54af59ac2 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -312,11 +312,6 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { const detail::RTDeviceBinaryImage &Img) { const char *Target = Img.getRawData().DeviceTargetSpec; auto BE = Dev.get_backend(); - // ESIMD emulator backend is only compatible with esimd kernels. - if (BE == sycl::backend::ext_intel_esimd_emulator) { - pi_device_binary_property Prop = Img.getProperty("isEsimdImage"); - return (Prop && (detail::DeviceBinaryProperty(Prop).asUint32() != 0)); - } if (strcmp(Target, __SYCL_PI_DEVICE_BINARY_TARGET_SPIRV64) == 0) { return (BE == sycl::backend::opencl || BE == sycl::backend::ext_oneapi_level_zero); diff --git a/sycl/source/physical_mem.cpp b/sycl/source/physical_mem.cpp new file mode 100644 index 0000000000000..d9d6073a68e89 --- /dev/null +++ b/sycl/source/physical_mem.cpp @@ -0,0 +1,38 @@ +//==--- physical_mem.cpp - sycl_ext_oneapi_virtual_mem physical_mem class --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +physical_mem::physical_mem(const device &SyclDevice, const context &SyclContext, + size_t NumBytes) { + if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem)) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_virtual_mem."); + + impl = std::make_shared( + SyclDevice, SyclContext, NumBytes); +} + +void *physical_mem::map(uintptr_t Ptr, size_t NumBytes, + address_access_mode Mode, size_t Offset) const { + return impl->map(Ptr, NumBytes, Mode, Offset); +} + +context physical_mem::get_context() const { return impl->get_context(); } +device physical_mem::get_device() const { return impl->get_device(); } +size_t physical_mem::size() const noexcept { return impl->size(); } + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index db3ce2f5cb1b3..9648431a5a429 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -106,39 +106,44 @@ void queue::throw_asynchronous() { impl->throw_asynchronous(); } event queue::memset(void *Ptr, int Value, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, {}); + return impl->memset(impl, Ptr, Value, Count, {}, /*CallerNeedsEvent=*/true); } event queue::memset(void *Ptr, int Value, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, {DepEvent}); + return impl->memset(impl, Ptr, Value, Count, {DepEvent}, + /*CallerNeedsEvent=*/true); } event queue::memset(void *Ptr, int Value, size_t Count, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memset(impl, Ptr, Value, Count, DepEvents); + return impl->memset(impl, Ptr, Value, Count, DepEvents, + /*CallerNeedsEvent=*/true); } event queue::memcpy(void *Dest, const void *Src, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, {}, CodeLoc); + return impl->memcpy(impl, Dest, Src, Count, {}, /*CallerNeedsEvent=*/true, + CodeLoc); } event queue::memcpy(void *Dest, const void *Src, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, {DepEvent}, CodeLoc); + return impl->memcpy(impl, Dest, Src, Count, {DepEvent}, + /*CallerNeedsEvent=*/true, CodeLoc); } event queue::memcpy(void *Dest, const void *Src, size_t Count, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->memcpy(impl, Dest, Src, Count, DepEvents, CodeLoc); + return impl->memcpy(impl, Dest, Src, Count, DepEvents, + /*CallerNeedsEvent=*/true, CodeLoc); } event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, @@ -150,20 +155,23 @@ event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {}); + return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {}, + /*CallerNeedsEvent=*/true); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {DepEvent}); + return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), {DepEvent}, + /*CallerNeedsEvent=*/true); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, const std::vector &DepEvents, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), DepEvents); + return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), DepEvents, + /*CallerNeedsEvent=*/true); } event queue::discard_or_return(const event &Event) { @@ -184,6 +192,11 @@ event queue::submit_impl(std::function CGH, queue SecondQueue, return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); } +void queue::submit_without_event_impl(std::function CGH, + const detail::code_location &CodeLoc) { + return impl->submit_without_event(CGH, impl, CodeLoc); +} + event queue::submit_impl_and_postprocess( std::function CGH, const detail::code_location &CodeLoc, const SubmitPostProcessF &PostProcess) { @@ -225,8 +238,13 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) { /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) { - if (is_in_order() && !impl->getCommandGraph() && !impl->MIsProfilingEnabled) - return getBarrierEventForInorderQueueHelper(impl); + if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents && + !impl->MIsProfilingEnabled) { + event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); + // If the last event was discarded, fall back to enqueuing a barrier. + if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) + return InOrderLastEvent; + } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc); } @@ -247,9 +265,13 @@ event queue::ext_oneapi_submit_barrier(const std::vector &WaitList, auto EventImpl = detail::getSyclObjImpl(Event); return !EventImpl->isContextInitialized() || EventImpl->isNOP(); }); - if (is_in_order() && !impl->getCommandGraph() && !impl->MIsProfilingEnabled && - AllEventsEmptyOrNop) - return getBarrierEventForInorderQueueHelper(impl); + if (is_in_order() && !impl->getCommandGraph() && !impl->MDiscardEvents && + !impl->MIsProfilingEnabled && AllEventsEmptyOrNop) { + event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl); + // If the last event was discarded, fall back to enqueuing a barrier. + if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded()) + return InOrderLastEvent; + } return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); }, CodeLoc); @@ -321,7 +343,7 @@ event queue::memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, const std::vector &DepEvents) { return impl->memcpyToDeviceGlobal(impl, DeviceGlobalPtr, Src, IsDeviceImageScope, NumBytes, Offset, - DepEvents); + DepEvents, /*CallerNeedsEvent=*/true); } event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, @@ -330,7 +352,7 @@ event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, const std::vector &DepEvents) { return impl->memcpyFromDeviceGlobal(impl, Dest, DeviceGlobalPtr, IsDeviceImageScope, NumBytes, Offset, - DepEvents); + DepEvents, /*CallerNeedsEvent=*/true); } bool queue::device_has(aspect Aspect) const { diff --git a/sycl/source/stream.cpp b/sycl/source/stream.cpp index d2578bbf1f5bc..740b6de2f8b2f 100644 --- a/sycl/source/stream.cpp +++ b/sycl/source/stream.cpp @@ -56,7 +56,7 @@ stream::stream(size_t BufferSize, size_t MaxStatementSize, handler &CGH, detail::getSyclObjImpl(GlobalFlushBuf)->PerWI = true; } -size_t stream::size() const noexcept { return impl->get_size(); } +size_t stream::size() const noexcept { return impl->size(); } size_t stream::get_work_item_buffer_size() const { return impl->get_work_item_buffer_size(); diff --git a/sycl/source/virtual_mem.cpp b/sycl/source/virtual_mem.cpp new file mode 100644 index 0000000000000..8cdc5ffba0223 --- /dev/null +++ b/sycl/source/virtual_mem.cpp @@ -0,0 +1,183 @@ +//==- virtual_mem.cpp - sycl_ext_oneapi_virtual_mem virtual mem free funcs -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +// System headers for querying page-size. +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +__SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice, + const context &SyclContext, + granularity_mode Mode) { + if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem)) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_virtual_mem."); + + pi_virtual_mem_granularity_info GranularityQuery = [=]() { + switch (Mode) { + case granularity_mode::minimum: + return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; + case granularity_mode::recommended: + return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; + } + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "Unrecognized granularity mode."); + }(); + + std::shared_ptr DeviceImpl = + sycl::detail::getSyclObjImpl(SyclDevice); + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); +#ifndef NDEBUG + size_t InfoOutputSize; + Plugin->call( + ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery, + 0, nullptr, &InfoOutputSize); + assert(InfoOutputSize == sizeof(size_t) && + "Unexpected output size of granularity info query."); +#endif // NDEBUG + size_t Granularity = 0; + Plugin->call( + ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery, + sizeof(size_t), &Granularity, nullptr); + return Granularity; +} + +__SYCL_EXPORT size_t get_mem_granularity(const context &SyclContext, + granularity_mode Mode) { + const std::vector Devices = SyclContext.get_devices(); + if (!std::all_of(Devices.cbegin(), Devices.cend(), [](const device &Dev) { + return Dev.has(aspect::ext_oneapi_virtual_mem); + })) { + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "One or more devices in the context does not support " + "aspect::ext_oneapi_virtual_mem."); + } + + // CUDA only needs page-size granularity. + if (SyclContext.get_backend() == backend::ext_oneapi_cuda) { +#ifdef _WIN32 + SYSTEM_INFO SystemInfo; + GetSystemInfo(&SystemInfo); + return static_cast(SystemInfo.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif + } + + // Otherwise, we find the least common multiple of granularity of the devices + // in the context. + size_t LCMGranularity = get_mem_granularity(Devices[0], SyclContext, Mode); + for (size_t I = 1; I < Devices.size(); ++I) { + size_t DevGranularity = get_mem_granularity(Devices[I], SyclContext, Mode); + size_t GCD = LCMGranularity; + size_t Rem = DevGranularity % GCD; + while (Rem != 0) { + std::swap(GCD, Rem); + Rem %= GCD; + } + LCMGranularity *= DevGranularity / GCD; + } + return LCMGranularity; +} + +__SYCL_EXPORT uintptr_t reserve_virtual_mem(uintptr_t Start, size_t NumBytes, + const context &SyclContext) { + std::vector Devs = SyclContext.get_devices(); + if (std::any_of(Devs.cbegin(), Devs.cend(), [](const device &Dev) { + return !Dev.has(aspect::ext_oneapi_virtual_mem); + })) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "One or more devices in the supplied context does not support " + "aspect::ext_oneapi_virtual_mem."); + + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + void *OutPtr = nullptr; + Plugin->call( + ContextImpl->getHandleRef(), reinterpret_cast(Start), NumBytes, + &OutPtr); + return reinterpret_cast(OutPtr); +} + +__SYCL_EXPORT void free_virtual_mem(uintptr_t Ptr, size_t NumBytes, + const context &SyclContext) { + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + Plugin->call( + ContextImpl->getHandleRef(), reinterpret_cast(Ptr), NumBytes); +} + +__SYCL_EXPORT void set_access_mode(const void *Ptr, size_t NumBytes, + address_access_mode Mode, + const context &SyclContext) { + sycl::detail::pi::PiVirtualAccessFlags AccessFlags = + sycl::detail::AccessModeToVirtualAccessFlags(Mode); + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes, AccessFlags); +} + +__SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr, + size_t NumBytes, + const context &SyclContext) { + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); +#ifndef NDEBUG + size_t InfoOutputSize; + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes, + PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE, 0, nullptr, &InfoOutputSize); + assert(InfoOutputSize == sizeof(sycl::detail::pi::PiVirtualAccessFlags) && + "Unexpected output size of access mode info query."); +#endif // NDEBUG + sycl::detail::pi::PiVirtualAccessFlags AccessFlags; + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes, + PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE, + sizeof(sycl::detail::pi::PiVirtualAccessFlags), &AccessFlags, nullptr); + + if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_RW) + return address_access_mode::read_write; + if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + return address_access_mode::read; + return address_access_mode::none; +} + +__SYCL_EXPORT void unmap(const void *Ptr, size_t NumBytes, + const context &SyclContext) { + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes); +} + +} // Namespace ext::oneapi::experimental +} // namespace _V1 +} // Namespace sycl diff --git a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp index b4ae8b2b30e12..3783de97bd6c1 100644 --- a/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp +++ b/sycl/test-e2e/AddressSanitizer/common/kernel-debug.cpp @@ -4,12 +4,22 @@ // RUN: env SYCL_PREFER_UR=1 UR_LAYER_ASAN_OPTIONS=debug:0 %{run} %t 2>&1 | FileCheck %s #include +/// This test is used to check enabling/disabling kernel debug message +/// We always use "[kernel]" prefix in kernel debug message + +constexpr std::size_t N = 4; +constexpr std::size_t group_size = 1; + int main() { sycl::queue Q; - int *array = sycl::malloc_device(1, Q); + int *array = sycl::malloc_device(N, Q); - Q.submit([&](sycl::handler &h) { - h.single_task([=]() { *array = 0; }); + Q.submit([&](sycl::handler &cgh) { + auto acc = sycl::local_accessor(group_size, cgh); + cgh.parallel_for( + sycl::nd_range<1>(N, group_size), [=](sycl::nd_item<1> item) { + array[item.get_global_id()] = acc[item.get_local_id()]; + }); }); Q.wait(); // CHECK-DEBUG: [kernel] diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp new file mode 100644 index 0000000000000..2e10143fdad8c --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/USM/parallel_no_local_size.cpp @@ -0,0 +1,41 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O0 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{build} %device_asan_flags -DMALLOC_DEVICE -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-DEVICE %s +// RUN: %{build} %device_asan_flags -DMALLOC_HOST -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck --check-prefixes CHECK,CHECK-HOST %s +// RUN: %{build} %device_asan_flags -DMALLOC_SHARED -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t &> %t.txt ; FileCheck --check-prefixes CHECK,CHECK-SHARED --input-file %t.txt %s + +#include + +#include + +int main() { + sycl::queue Q; + constexpr std::size_t N = 12345; +#if defined(MALLOC_HOST) + auto *array = sycl::malloc_host(N, Q); +#elif defined(MALLOC_SHARED) + auto *array = sycl::malloc_shared(N, Q); +#else // defined(MALLOC_DEVICE) + auto *array = sycl::malloc_device(N, Q); +#endif + + Q.submit([&](sycl::handler &h) { + h.parallel_for(sycl::range<1>(N + 1), + [=](sycl::id<1> i) { ++array[i]; }); + }); + Q.wait(); + // CHECK-DEVICE: ERROR: DeviceSanitizer: out-of-bounds-access on Device USM + // CHECK-HOST: ERROR: DeviceSanitizer: out-of-bounds-access on Host USM + // CHECK-SHARED: ERROR: DeviceSanitizer: out-of-bounds-access on Shared USM + // CHECK: READ of size 1 at kernel {{<.*MyKernel.*>}} LID({{.*}}, 0, 0) GID(12345, 0, 0) + // CHECK: {{ #0 .* .*parallel_no_local_size.cpp:}}[[@LINE-7]] + + sycl::free(array, Q); + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp new file mode 100644 index 0000000000000..1c8ba6915abc8 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/multiple_private.cpp @@ -0,0 +1,77 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_asan_flags -DVAR=1 -O2 -g -o %t1 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t1 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR1 %s +// RUN: %{build} %device_asan_flags -DVAR=2 -O2 -g -o %t2 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t2 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR2 %s +// RUN: %{build} %device_asan_flags -DVAR=3 -O2 -g -o %t3 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t3 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR3 %s +// RUN: %{build} %device_asan_flags -DVAR=4 -O2 -g -o %t4 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t4 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR4 %s +// RUN: %{build} %device_asan_flags -DVAR=5 -O2 -g -o %t5 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t5 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR5 %s +// RUN: %{build} %device_asan_flags -DVAR=6 -O2 -g -o %t6 +// RUN: env SYCL_PREFER_UR=1 %{run} not %t6 2>&1 | FileCheck --check-prefixes CHECK,CHECK-VAR6 %s + +#include +#include + +// CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Private Memory +template __attribute__((noinline)) T foo(T *p) { return *p; } +template __attribute__((noinline)) T foo1(T *p) { return *p; } +// CHECK-VAR1: READ of size 2 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR1: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo2(T *p) { return *p; } +// CHECK-VAR2: READ of size 2 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR2: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo3(T *p) { return *p; } +// CHECK-VAR3: READ of size 4 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR3: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo4(T *p) { return *p; } +// CHECK-VAR4: READ of size 4 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR4: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo5(T *p) { return *p; } +// CHECK-VAR5: READ of size 8 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR5: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] +template __attribute__((noinline)) T foo6(T *p) { return *p; } +// CHECK-VAR6: READ of size 1 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID(0, 0, 0) +// CHECK-VAR6: #0 {{.*}} {{.*multiple_private.cpp}}:[[@LINE-2]] + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_device(5, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + short p1[] = {1}; + int p2[] = {1}; + int p3[10] = {8, 1, 10, 1, 0, 10}; + long p4[] = {5111LL}; + char p5[] = {'c'}; + + array[0] = foo(&p1[0]); + array[1] = foo(&p2[0]); + for (int i = 0; i < 10; ++i) + array[2] += foo(&p3[i]); + array[3] = foo(&p4[0]); + array[4] = foo(&p5[0]); + +#if VAR == 1 + array[0] = foo1(&p1[-4]); +#elif VAR == 2 + array[0] = foo2(&p1[4]); +#elif VAR == 3 + array[0] = foo3(&p2[1]); +#elif VAR == 4 + array[0] = foo4(&p3[10]); +#elif VAR == 5 + array[0] = foo5(&p4[1]); +#else + array[0] = foo6(&p5[1]); +#endif + }); + }); + Q.wait(); + sycl::free(array, Q); + + return 0; +} diff --git a/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp new file mode 100644 index 0000000000000..50f1f763233d6 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/out-of-bounds/private/single_private.cpp @@ -0,0 +1,32 @@ +// REQUIRES: linux, cpu +// RUN: %{build} %device_asan_flags -O0 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck %s +// RUN: %{build} %device_asan_flags -O1 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck %s +// RUN: %{build} %device_asan_flags -O2 -g -o %t +// RUN: env SYCL_PREFER_UR=1 %{run} not %t 2>&1 | FileCheck %s + +#include +#include + +__attribute__((noinline)) int foo(int p[], int i) { return p[i]; } +// CHECK: ERROR: DeviceSanitizer: out-of-bounds-access on Private Memory +// CHECK: READ of size 4 at kernel {{<.*MyKernel>}} LID(0, 0, 0) GID({{.*}}, 0, 0) +// CHECK: #0 {{.*}} {{.*single_private.cpp}}:[[@LINE-3]] + +int main() { + sycl::queue Q; + auto *array = sycl::malloc_device(1, Q); + + Q.submit([&](sycl::handler &h) { + h.single_task([=]() { + int p[] = {1, 2, 3, 4}; + for (int i = 0; i < 5; ++i) + array[0] = foo(p, i); + }); + }); + Q.wait(); + sycl::free(array, Q); + + return 0; +} diff --git a/sycl/test-e2e/Basic/built-ins/helpers.hpp b/sycl/test-e2e/Basic/built-ins/helpers.hpp index 03a7c720e9afd..724e417c4d6e0 100644 --- a/sycl/test-e2e/Basic/built-ins/helpers.hpp +++ b/sycl/test-e2e/Basic/built-ins/helpers.hpp @@ -33,16 +33,28 @@ void test(bool CheckDevice, double delta, FuncTy F, ExpectedTy Expected, sycl::buffer SuccessBuf{1}; + sycl::queue q; + sycl::device dev = q.get_device(); // Make sure we don't use fp64 on devices that don't support it. - sycl::detail::get_elem_type_t d(delta); - - sycl::queue{}.submit([&](sycl::handler &cgh) { + const bool fp64 = dev.has(sycl::aspect::fp64); + q.submit([&](sycl::handler &cgh) { sycl::accessor Success{SuccessBuf, cgh}; - cgh.single_task([=]() { - auto R = F(Args...); - static_assert(std::is_same_v); - Success[0] = equal(R, Expected, d); - }); + if (fp64) { + cgh.single_task([=]() { + auto R = F(Args...); + static_assert(std::is_same_v); + // use double precision error tolerance when fp64 supported + Success[0] = equal(R, Expected, delta); + }); + } else { + // downscale the error tolerance when fp64 is not supported + sycl::detail::get_elem_type_t d(delta); + cgh.single_task([=]() { + auto R = F(Args...); + static_assert(std::is_same_v); + Success[0] = equal(R, Expected, d); + }); + } }); assert(sycl::host_accessor{SuccessBuf}[0]); } diff --git a/sycl/test-e2e/Basic/exceptions-SYCL-2020.cpp b/sycl/test-e2e/Basic/exceptions-SYCL-2020.cpp index 66be50e8eb181..c93f93f3eba69 100644 --- a/sycl/test-e2e/Basic/exceptions-SYCL-2020.cpp +++ b/sycl/test-e2e/Basic/exceptions-SYCL-2020.cpp @@ -80,23 +80,6 @@ int main() { static_assert(!std::is_error_condition_enum::value, "errc enum should not identify as error condition"); - // Test errc_for and backends. Should compile without complaint. - constexpr int EC = 1; - sycl::backend_traits::errc someOpenCLErrCode{EC}; - sycl::errc_for anotherOpenCLErrCode{EC}; - assert(someOpenCLErrCode == anotherOpenCLErrCode); - sycl::backend_traits::errc - someL0ErrCode{EC}; - sycl::errc_for anotherL0ErrCode{EC}; - assert(someL0ErrCode == anotherL0ErrCode); - sycl::backend_traits::errc someCUDAErrCode{ - EC}; - sycl::errc_for anotherCUDAErrCode{EC}; - assert(someCUDAErrCode == anotherCUDAErrCode); - sycl::backend_traits::errc someHIPErrCode{EC}; - sycl::errc_for anotherHIPErrCode{EC}; - assert(someHIPErrCode == anotherHIPErrCode); - std::cout << "OK" << std::endl; return 0; } diff --git a/sycl/test-e2e/Basic/linear-sub_group.cpp b/sycl/test-e2e/Basic/linear-sub_group.cpp index df99664c8f9b9..a7b5f8c720732 100644 --- a/sycl/test-e2e/Basic/linear-sub_group.cpp +++ b/sycl/test-e2e/Basic/linear-sub_group.cpp @@ -9,7 +9,6 @@ // //===----------------------------------------------------------------------===// -#include "../SubGroup/helper.hpp" #include #include #include @@ -20,10 +19,6 @@ using namespace sycl; int main(int argc, char *argv[]) { queue q; - if (!core_sg_supported(q.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } // Fill output array with sub-group IDs const uint32_t outer = 2; diff --git a/sycl/test-e2e/Basic/vector/load_store.cpp b/sycl/test-e2e/Basic/vector/load_store.cpp new file mode 100644 index 0000000000000..626fd0264fb71 --- /dev/null +++ b/sycl/test-e2e/Basic/vector/load_store.cpp @@ -0,0 +1,208 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// RUN: %if preview-breaking-changes-supported %{ %{build} -fpreview-breaking-changes -o %t2.out %} +// RUN: %if preview-breaking-changes-supported %{ %{run} %t2.out %} + +// Tests load and store on sycl::vec. + +#include +#include +#include +#include + +namespace syclex = sycl::ext::oneapi; + +template +int CheckResult(const T0 &Actual, const T1 &Reference, const char *Category) { + int Failures = 0; + for (size_t I = 0; I < N; ++I) { + if (Actual[I] == Reference[I]) + continue; + + std::cout << "Failed at index " << I << ": " << Category << " - " + << Actual[I] << " != " << Reference[I] << std::endl; + ++Failures; + } + return Failures; +} + +template int RunTest(sycl::queue &Q) { + using ElemT = typename VecT::element_type; + + int Failures = 0; + // Load on host. + // Note: multi_ptr is not usable on host, so only raw pointer is tested. + { + const ElemT Ref[] = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13}; + VecT V{0}; + V.load(2, Ref); + Failures += CheckResult<4>(V, Ref + 8, "load with raw pointer on host"); + } + + // Store on host. + // Note: multi_ptr is not usable on host, so only raw pointer is tested. + { + ElemT Out[] = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13}; + const VecT V{4, 3, 2, 1}; + V.store(1, Out); + const ElemT Ref[] = {0, 2, 1, 4, 4, 3, 2, 1, 7, 10, 9, 12, 11, 14, 13}; + Failures += + CheckResult(Out, Ref, "store in raw pointer on host"); + } + + // Load on device. + { + const ElemT Ref[] = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, + 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24}; + VecT V[6] = {VecT{0}}; + + { + sycl::buffer RefBuff{Ref, std::size(Ref)}; + sycl::buffer VBuff{V, std::size(V)}; + + Q.submit([&](sycl::handler &CGH) { + sycl::accessor GlobalRefAcc{RefBuff, CGH, sycl::read_only}; + sycl::accessor VAcc{VBuff, CGH, sycl::read_write}; + sycl::local_accessor LocalRefAcc{std::size(Ref), CGH}; + CGH.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) { + // Initialize the local and private memory copies. + ElemT PrivateRef[std::size(Ref)] = {0}; + for (size_t I = 0; I < GlobalRefAcc.size(); ++I) { + PrivateRef[I] = GlobalRefAcc[I]; + LocalRefAcc[I] = GlobalRefAcc[I]; + } + + // Load with global multi_ptr. + auto GlobalMPtr = + GlobalRefAcc + .template get_multi_ptr(); + VAcc[0].load(0, GlobalMPtr); + + // Load with local multi_ptr. + auto LocalMPtr = + LocalRefAcc.template get_multi_ptr(); + VAcc[1].load(1, LocalMPtr); + + // Load with private multi_ptr. + auto PrivateMPtr = sycl::address_space_cast< + sycl::access::address_space::private_space, + sycl::access::decorated::no>(PrivateRef); + VAcc[2].load(2, PrivateMPtr); + + // Load with global raw pointer. + const ElemT *GlobalRawPtr = GlobalMPtr.get_raw(); + VAcc[3].load(3, GlobalRawPtr); + + // Load with local raw pointer. + const ElemT *LocalRawPtr = LocalMPtr.get_raw(); + VAcc[4].load(4, LocalRawPtr); + + // Load with private raw pointer. + VAcc[5].load(5, PrivateRef); + }); + }); + } + + Failures += + CheckResult<4>(V[0], Ref, "load with global multi_ptr on device"); + Failures += + CheckResult<4>(V[1], Ref + 4, "load with local multi_ptr on device"); + Failures += + CheckResult<4>(V[2], Ref + 8, "load with private multi_ptr on device"); + Failures += CheckResult<4>(V[3], Ref + 12, + "load with global raw pointer on device"); + Failures += + CheckResult<4>(V[4], Ref + 16, "load with local raw pointer on device"); + Failures += CheckResult<4>(V[5], Ref + 20, + "load with private raw pointer on device"); + } + + // Store on device. + { + ElemT Out[24] = {0}; + const VecT V[] = {{0, 2, 1, 4}, {3, 6, 5, 8}, {7, 10, 9, 12}, + {11, 14, 13, 16}, {15, 18, 17, 20}, {19, 22, 21, 24}}; + + { + sycl::buffer OutBuff{Out, std::size(Out)}; + + Q.submit([&](sycl::handler &CGH) { + sycl::accessor OutAcc{OutBuff, CGH, sycl::read_write}; + sycl::local_accessor LocalOutAcc{std::size(Out), CGH}; + CGH.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1>) { + ElemT PrivateVal[std::size(Out)] = {0}; + + // Store in global multi_ptr. + auto GlobalMPtr = + OutAcc.template get_multi_ptr(); + V[0].store(0, GlobalMPtr); + + // Store in local multi_ptr. + auto LocalMPtr = + LocalOutAcc.template get_multi_ptr(); + V[1].store(1, LocalMPtr); + + // Store in private multi_ptr. + auto PrivateMPtr = sycl::address_space_cast< + sycl::access::address_space::private_space, + sycl::access::decorated::no>(PrivateVal); + V[2].store(2, PrivateMPtr); + + // Store in global raw pointer. + ElemT *GlobalRawPtr = GlobalMPtr.get_raw(); + V[3].store(3, GlobalRawPtr); + + // Store in local raw pointer. + ElemT *LocalRawPtr = LocalMPtr.get_raw(); + V[4].store(4, LocalRawPtr); + + // Store in private raw pointer. + V[5].store(5, PrivateVal); + + // Write local and private results back to the global buffer. + for (size_t I = 0; I < 4; ++I) { + OutAcc[4 + I] = LocalMPtr[4 + I]; + OutAcc[8 + I] = PrivateVal[8 + I]; + OutAcc[16 + I] = LocalMPtr[16 + I]; + OutAcc[20 + I] = PrivateVal[20 + I]; + } + }); + }); + } + + const ElemT Ref[] = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, + 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24}; + + Failures += CheckResult<4>(Out, Ref, "store in global multi_ptr on device"); + Failures += + CheckResult<4>(Out + 4, Ref + 4, "store in local multi_ptr on device"); + Failures += CheckResult<4>(Out + 8, Ref + 8, + "store in private multi_ptr on device"); + Failures += CheckResult<4>(Out + 12, Ref + 12, + "store in global raw pointer on device"); + Failures += CheckResult<4>(Out + 16, Ref + 16, + "store in local raw pointer on device"); + Failures += CheckResult<4>(Out + 20, Ref + 20, + "store in private raw pointer on device"); + } + + return Failures; +} + +int main() { + sycl::queue Q; + + int Failures = 0; + + Failures += RunTest(Q); + Failures += RunTest(Q); + Failures += RunTest>(Q); + + if (Q.get_device().has(sycl::aspect::fp16)) + Failures += RunTest(Q); + if (Q.get_device().has(sycl::aspect::fp64)) + Failures += RunTest(Q); + + return Failures; +} diff --git a/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp b/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp index ea7f851f5c709..9ee3932f8689d 100644 --- a/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp +++ b/sycl/test-e2e/ESIMD/unified_memory_api/atomic_update_usm_dg2_pvc_cmpxchg.cpp @@ -7,6 +7,7 @@ //===---------------------------------------------------------------------===// // REQUIRES: gpu-intel-pvc || gpu-intel-dg2 +// REQUIRES-INTEL-DRIVER: win: 101.5660 // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp index da3d34df48f37..5d962d2a51948 100644 --- a/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp +++ b/sycl/test-e2e/FilterSelector/select_device_level_zero.cpp @@ -22,30 +22,26 @@ int main() { } { - default_selector ds; - device d = ds.select_device(); + device d(default_selector_v); string name = d.get_platform().get_info(); assert(name.find("Level-Zero") != string::npos); } { - gpu_selector gs; - device d = gs.select_device(); + device d(gpu_selector_v); string name = d.get_platform().get_info(); assert(name.find("Level-Zero") != string::npos); } { - cpu_selector cs; try { - device d = cs.select_device(); + device d(cpu_selector_v); cerr << "CPU device is found in error: " << d.is_cpu() << std::endl; return -1; } catch (...) { } } { - accelerator_selector as; try { - device d = as.select_device(); + device d(accelerator_selector_v); cerr << "ACC device is found in error: " << d.is_accelerator() << std::endl; } catch (...) { diff --git a/sycl/test-e2e/Graph/Explicit/host_task_last.cpp b/sycl/test-e2e/Graph/Explicit/host_task_last.cpp index 34df0750b5366..5371ea1df3708 100644 --- a/sycl/test-e2e/Graph/Explicit/host_task_last.cpp +++ b/sycl/test-e2e/Graph/Explicit/host_task_last.cpp @@ -2,8 +2,10 @@ // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG // RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Disabled due to https://github.com/intel/llvm/issues/14350 // Extra run to check for immediate-command-list in Level Zero -// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// xRUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} // REQUIRES: aspect-usm_shared_allocations diff --git a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp index b577fe16d458d..fee893b9f7529 100644 --- a/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp +++ b/sycl/test-e2e/HostInteropTask/interop-task-cuda-buffer-migrate.cpp @@ -1,7 +1,4 @@ // REQUIRES: cuda -// XFAIL: cuda -// -// FIXME: this is broken with a multi device context // // RUN: %{build} -o %t.out -lcuda // RUN: %{run} %t.out @@ -31,12 +28,6 @@ int main() { platform(gpu_selector_v).get_devices(info::device_type::gpu); std::cout << Devices.size() << " devices found" << std::endl; - if (Devices.size() == 1) { - // Since this is XFAIL for Devices.size() > 1 we need to return failure if - // test can't run - return 1; - } - context C(Devices); int Index = 0; diff --git a/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp b/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp index befdb27f439e2..cfa4f0f53e060 100644 --- a/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp +++ b/sycl/test-e2e/Plugin/interop-level-zero-keep-ownership.cpp @@ -30,8 +30,9 @@ int main() { { // Scope in which SYCL interop context object is live std::vector Devices{}; Devices.push_back(Device); - auto Context = level_zero::make(Devices, ZeContext, - level_zero::ownership::keep); + auto Context = make_context( + backend_input_t{ + ZeContext, Devices, ext::oneapi::level_zero::ownership::keep}); // Create L0 event pool ze_event_pool_handle_t ZeEventPool; @@ -52,8 +53,10 @@ int main() { { // Scope in which SYCL interop event is alive int i = 0; - event Event = level_zero::make(Context, ZeEvent, - level_zero::ownership::keep); + event Event = make_event( + backend_input_t{ + ZeEvent, ext::oneapi::level_zero::ownership::keep}, + Context); info::event_command_status status; do { diff --git a/sycl/test-e2e/Plugin/interop-opencl.cpp b/sycl/test-e2e/Plugin/interop-opencl.cpp index 9c52bc0ecf8b0..faa5ae348191a 100644 --- a/sycl/test-e2e/Plugin/interop-opencl.cpp +++ b/sycl/test-e2e/Plugin/interop-opencl.cpp @@ -47,10 +47,11 @@ int main() { assert(ocl_buffers.size() == 1); // Re-create SYCL objects from native OpenCL handles - auto PlatformInterop = opencl::make(ocl_platform); - auto DeviceInterop = opencl::make(ocl_device); - auto ContextInterop = opencl::make(ocl_context); - auto QueueInterop = opencl::make(ContextInterop, ocl_queue); + auto PlatformInterop = sycl::make_platform(ocl_platform); + auto DeviceInterop = sycl::make_device(ocl_device); + auto ContextInterop = sycl::make_context(ocl_context); + auto QueueInterop = + sycl::make_queue(ocl_queue, ContextInterop); auto BufferInterop = sycl::make_buffer(ocl_buffers[0], ContextInterop); diff --git a/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp b/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp index f34be43ae9587..2b8871ef5a3b6 100644 --- a/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp +++ b/sycl/test-e2e/ProfilingTag/in_order_profiling_queue.cpp @@ -21,9 +21,9 @@ // https://github.com/intel/llvm/issues/14053 // UNSUPPORTED: cuda -// Fails on FPGA and level_zero too +// FPGA emulator seems to return unexpected start time for the fallback barrier. // https://github.com/intel/llvm/issues/14315 -// UNSUPPORTED: accelerator || level_zero +// UNSUPPORTED: accelerator #include "common.hpp" diff --git a/sycl/test-e2e/ProfilingTag/profiling_queue.cpp b/sycl/test-e2e/ProfilingTag/profiling_queue.cpp index d0da7612d4ea9..a028278ed957a 100644 --- a/sycl/test-e2e/ProfilingTag/profiling_queue.cpp +++ b/sycl/test-e2e/ProfilingTag/profiling_queue.cpp @@ -17,6 +17,7 @@ // UNSUPPORTED: hip // FPGA emulator seems to return unexpected start time for the fallback barrier. +// https://github.com/intel/llvm/issues/14315 // UNSUPPORTED: accelerator // Flaky on CUDA diff --git a/sycl/test-e2e/Regression/cache_test.cpp b/sycl/test-e2e/Regression/cache_test.cpp index 2bbf7a6431746..72304a2444fec 100644 --- a/sycl/test-e2e/Regression/cache_test.cpp +++ b/sycl/test-e2e/Regression/cache_test.cpp @@ -91,64 +91,32 @@ int queryFromNativeHandle(std::vector *platform_list, zeDeviceGet(l0_drivers[0], &l0_device_count, l0_devices.data()); // Create the platform and device objects using the native handle. - { - // Using deprecated level_zero-specific interface. Intentionally copy-pasted - // and not outlined into a helper because the deprecated interface will be - // removed in a few months. - auto plt = level_zero::make(l0_drivers[0]); - auto dev = level_zero::make(plt, l0_devices[0]); - - // Check to see if this platform is in the platform list. - std::cout << "Platform created with native handle: " - << plt.get_info() << std::endl; - auto plt_result = std::find_if(platform_list->begin(), platform_list->end(), - [&](sycl::platform &p) { return p == plt; }); - if (plt_result != platform_list->end()) { - std::cout << "The platform list contains: " - << plt.get_info() << std::endl; - } else { - std::cout << plt.get_info() - << " was not in the platform list.\n"; - failures++; - } + auto plt = + sycl::make_platform(l0_drivers[0]); + auto dev = + sycl::make_device(l0_devices[0]); - // Check to see if this device is in the device list. - std::cout << "Device created with native handle: " - << dev.get_info() << std::endl; - auto dev_result = std::find_if(device_list->begin(), device_list->end(), - [&](sycl::device &d) { return d == dev; }); - if (dev_result != device_list->end()) { - std::cout << "The device list contains: " - << dev.get_info() << std::endl; - } else { - std::cout << dev.get_info() - << " was not in the device list.\n"; - failures++; - } - } - { - // Using SYCL2020 interface. - auto plt = sycl::make_platform( - l0_drivers[0]); - auto dev = - sycl::make_device(l0_devices[0]); - - // Check to see if this platform is in the platform list. - std::cout << "Platform created with native handle: " + // Check to see if this platform is in the platform list. + std::cout << "Platform created with native handle: " + << plt.get_info() << std::endl; + auto plt_result = std::find_if(platform_list->begin(), platform_list->end(), + [&](sycl::platform &p) { return p == plt; }); + if (plt_result != platform_list->end()) { + std::cout << "The platform list contains: " << plt.get_info() << std::endl; - auto plt_result = std::find_if(platform_list->begin(), platform_list->end(), - [&](sycl::platform &p) { return p == plt; }); - if (plt_result != platform_list->end()) { - std::cout << "The platform list contains: " - << plt.get_info() << std::endl; - } else { - std::cout << plt.get_info() - << " was not in the platform list.\n"; - failures++; - } + } else { + std::cout << plt.get_info() + << " was not in the platform list.\n"; + failures++; + } - // Check to see if this device is in the device list. - std::cout << "Device created with native handle: " + // Check to see if this device is in the device list. + std::cout << "Device created with native handle: " + << dev.get_info() << std::endl; + auto dev_result = std::find_if(device_list->begin(), device_list->end(), + [&](sycl::device &d) { return d == dev; }); + if (dev_result != device_list->end()) { + std::cout << "The device list contains: " << dev.get_info() << std::endl; auto dev_result = std::find_if(device_list->begin(), device_list->end(), [&](sycl::device &d) { return d == dev; }); @@ -177,6 +145,7 @@ int queryFromNativeHandle(std::vector *platform_list, failures++; } } + return failures; } diff --git a/sycl/test-e2e/Regression/commandlist/Inputs/main.cpp b/sycl/test-e2e/Regression/commandlist/Inputs/main.cpp index 83c76afe0dd4b..c80ea80e45335 100644 --- a/sycl/test-e2e/Regression/commandlist/Inputs/main.cpp +++ b/sycl/test-e2e/Regression/commandlist/Inputs/main.cpp @@ -44,7 +44,6 @@ int main(int argc, char *argv[]) { int nthreadsGPU = 8; int arr_size = 20; int iter_gpu = 200; - unsigned int gpu_dev = 999; unsigned int nitems = 0; bool passed = true; @@ -73,21 +72,7 @@ int main(int argc, char *argv[]) { cout << " iter GPU: " << iter_gpu << "\n"; #endif - std::vector dlist; - if (gpu_dev == 999) { - try { - auto sel = sycl::gpu_selector(); - sel_dev = sel.select_device(); - } catch (...) { - cout << "no gpu device found\n"; - } - } else { - if (gpu_dev > dlist.size() - 1) { - cout << "ERROR: selected device index [" << gpu_dev << "] is too large\n"; - exit(1); - } - sel_dev = dlist[gpu_dev]; - } + sel_dev = sycl::device(sycl::default_selector_v); std::cout << "selected dev: " << sel_dev.get_info() << "\n"; diff --git a/sycl/test-e2e/Regression/device_num.cpp b/sycl/test-e2e/Regression/device_num.cpp index 5feaf110bdfcc..cc70ea5295406 100644 --- a/sycl/test-e2e/Regression/device_num.cpp +++ b/sycl/test-e2e/Regression/device_num.cpp @@ -175,27 +175,24 @@ int main() { targetDevIndex = GetPreferredDeviceIndex(devices, info::device_type::all); assert(targetDevIndex >= 0 && "Failed to find target device for default selector."); - default_selector ds; - device d = ds.select_device(); - std::cout << "default_selector selected "; + device d(default_selector_v); + std::cout << "default_selector_v selected "; printDeviceType(d); assert(devices[targetDevIndex] == d && "The selected device is not the target device specified."); } targetDevIndex = GetPreferredDeviceIndex(devices, info::device_type::gpu); if (targetDevIndex >= 0) { - gpu_selector gs; - device d = gs.select_device(); - std::cout << "gpu_selector selected "; + device d(gpu_selector_v); + std::cout << "gpu_selector_v selected "; printDeviceType(d); assert(devices[targetDevIndex] == d && "The selected device is not the target device specified."); } targetDevIndex = GetPreferredDeviceIndex(devices, info::device_type::cpu); if (targetDevIndex >= 0) { - cpu_selector cs; - device d = cs.select_device(); - std::cout << "cpu_selector selected "; + device d(cpu_selector_v); + std::cout << "cpu_selector_v selected "; printDeviceType(d); assert(devices[targetDevIndex] == d && "The selected device is not the target device specified."); @@ -203,9 +200,8 @@ int main() { targetDevIndex = GetPreferredDeviceIndex(devices, info::device_type::accelerator); if (targetDevIndex >= 0) { - accelerator_selector as; - device d = as.select_device(); - std::cout << "accelerator_selector selected "; + device d(accelerator_selector_v); + std::cout << "accelerator_selector_v selected "; printDeviceType(d); assert(devices[targetDevIndex] == d && "The selected device is not the target device specified."); diff --git a/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp b/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp index 79ef8c29caa1e..b44ab15c214c0 100644 --- a/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp +++ b/sycl/test-e2e/Regression/device_pci_address_bdf_format.cpp @@ -40,7 +40,7 @@ int main(int argc, char **argv) { continue; std::string PCIAddress = - dev.get_info(); + dev.get_info(); std::cout << "PCI address = " << PCIAddress << std::endl; assert(std::regex_match(PCIAddress, ExpectedBDFFormat)); } diff --git a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp index eb910425ea8d4..38a07ce20ef79 100644 --- a/sycl/test-e2e/Regression/get_subgroup_sizes.cpp +++ b/sycl/test-e2e/Regression/get_subgroup_sizes.cpp @@ -1,3 +1,9 @@ +// UNSUPPORTED: accelerator +// TODO: FPGAs currently report `sub_group_sizes` as non-empty list, +// despite not having extension `cl_intel_required_subgroup_size` +// UNSUPPORTED: cuda || hip +// TODO: Similar issue to FPGAs + // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -18,13 +24,15 @@ int main() { queue Q; auto Dev = Q.get_device(); auto Vec = Dev.get_info(); + std::vector SubGroupSizes = + Dev.get_info(); if (std::find(Vec.begin(), Vec.end(), "cl_intel_required_subgroup_size") != std::end(Vec)) { - std::vector SubGroupSizes = - Dev.get_info(); - std::vector::const_iterator MaxIter = - std::max_element(SubGroupSizes.begin(), SubGroupSizes.end()); - int MaxSubGroup_size = *MaxIter; + assert(!SubGroupSizes.empty() && + "Required sub-group size list should not be empty"); + } else { + assert(SubGroupSizes.empty() && + "Required sub-group size list should be empty"); } return 0; } diff --git a/sycl/test-e2e/Regression/group.cpp b/sycl/test-e2e/Regression/group.cpp index 6e528dccb695c..d906c3500a095 100644 --- a/sycl/test-e2e/Regression/group.cpp +++ b/sycl/test-e2e/Regression/group.cpp @@ -111,7 +111,7 @@ bool group__get_linear_id() { cgh.parallel_for( nd_range<3>{GlobalRange, LocalRange}, [=](nd_item I) { const auto Off = I.get_global_linear_id() * 3; - const auto LI = I.get_group().get_linear_id(); + const auto LI = I.get_group().get_group_linear_id(); Ptr[Off + 0] = LI; Ptr[Off + 1] = LI; Ptr[Off + 2] = LI; diff --git a/sycl/test-e2e/Regression/image_access.cpp b/sycl/test-e2e/Regression/image_access.cpp index 347430c406e37..7ade1b4fb3b4b 100644 --- a/sycl/test-e2e/Regression/image_access.cpp +++ b/sycl/test-e2e/Regression/image_access.cpp @@ -21,13 +21,13 @@ int main() { try { sycl::range<1> Range(32); - std::vector Data(Range.size() * 4, 0.0f); + std::vector Data(Range.size() * 4, 0.0f); sycl::image<1> Image(Data.data(), sycl::image_channel_order::rgba, sycl::image_channel_type::fp32, Range); sycl::queue Queue; Queue.submit([&](sycl::handler &CGH) { - sycl::accessor A(Image, CGH); @@ -35,7 +35,7 @@ int main() { }); Queue.wait_and_throw(); - sycl::accessor A(Image); diff --git a/sycl/test-e2e/SubGroup/attributes.cpp b/sycl/test-e2e/SubGroup/attributes.cpp index 44de853e07f91..a4503726c1dec 100644 --- a/sycl/test-e2e/SubGroup/attributes.cpp +++ b/sycl/test-e2e/SubGroup/attributes.cpp @@ -1,3 +1,10 @@ +// UNSUPPORTED: accelerator +// TODO: FPGAs currently report supported subgroups as {4,8,16,32,64}, causing +// this test to fail +// UNSUPPORTED: cuda || hip +// TODO: Device subgroup sizes reports {32}, but when we try to use it with a +// kernel attribute and check it, we get a subgroup size of 0. + // RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out // RUN: %{run} %t.out //==------- attributes.cpp - SYCL sub_group attributes test ----*- C++ -*---==// @@ -13,7 +20,7 @@ #define KERNEL_FUNCTOR_WITH_SIZE(SIZE) \ class KernelFunctor##SIZE { \ public: \ - [[intel::reqd_sub_group_size(SIZE)]] void \ + [[sycl::reqd_sub_group_size(SIZE)]] void \ operator()(sycl::nd_item<1> Item) const { \ const auto GID = Item.get_global_id(); \ } \ @@ -49,19 +56,6 @@ int main() { queue Queue; device Device = Queue.get_device(); - // According to specification, this kernel query requires `cl_khr_subgroups` - // or `cl_intel_subgroups`, and also `cl_intel_required_subgroup_size` - auto Vec = Device.get_info(); - if (std::find(Vec.begin(), Vec.end(), "cl_intel_subgroups") == - std::end(Vec) && - std::find(Vec.begin(), Vec.end(), "cl_khr_subgroups") == - std::end(Vec) || - std::find(Vec.begin(), Vec.end(), "cl_intel_required_subgroup_size") == - std::end(Vec)) { - std::cout << "Skipping test\n"; - return 0; - } - try { const auto SGSizes = Device.get_info(); diff --git a/sycl/test-e2e/SubGroup/generic-shuffle.cpp b/sycl/test-e2e/SubGroup/generic-shuffle.cpp deleted file mode 100644 index cb9f1a720b1b7..0000000000000 --- a/sycl/test-e2e/SubGroup/generic-shuffle.cpp +++ /dev/null @@ -1,239 +0,0 @@ -// RUN: %{build} -fsycl-device-code-split=per_kernel -o %t.out -// RUN: %{run} %t.out -// -//==-- generic_shuffle.cpp - SYCL sub_group generic shuffle test *- C++ -*--==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "helper.hpp" -#include -#include -#include -template class pointer_kernel; - -using namespace sycl; - -template -void check_pointer(queue &Queue, size_t G = 256, size_t L = 64) { - try { - nd_range<1> NdRange(G, L); - buffer buf(G); - buffer buf_up(G); - buffer buf_down(G); - buffer buf_xor(G); - buffer sgsizebuf(1); - Queue.submit([&](handler &cgh) { - auto acc = buf.template get_access(cgh); - auto acc_up = buf_up.template get_access(cgh); - auto acc_down = - buf_down.template get_access(cgh); - auto acc_xor = buf_xor.template get_access(cgh); - auto sgsizeacc = sgsizebuf.get_access(cgh); - - cgh.parallel_for( - NdRange, [=](nd_item<1> NdItem) { - sycl::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; - - T *ptr = static_cast(0x0) + wggid; - - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(ptr, SG.get_max_local_range()[0] / 2); - - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(ptr, sgid); - - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(ptr, sgid); - - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(ptr, sgid % SG.get_max_local_range()[0]); - }); - }); - host_accessor acc(buf); - host_accessor acc_up(buf_up); - host_accessor acc_down(buf_down); - host_accessor acc_xor(buf_xor); - host_accessor sgsizeacc(sgsizebuf); - - size_t sg_size = sgsizeacc[0]; - int SGid = 0; - int SGLid = 0; - int SGBeginGid = 0; - for (int j = 0; j < G; j++) { - if (j % L % sg_size == 0) { - SGid++; - SGLid = 0; - SGBeginGid = j; - } - if (j % L == 0) { - SGid = 0; - SGLid = 0; - SGBeginGid = j; - } - - /*GID of middle element in every subgroup*/ - exit_if_not_equal(acc[j], - static_cast(0x0) + - (j / L * L + SGid * sg_size + sg_size / 2), - "shuffle"); - - /* Value GID+SGID for all element except last SGID in SG*/ - if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) { - exit_if_not_equal(acc_down[j], static_cast(0x0) + (j + SGid), - "shuffle_down"); - } - - /* Value GID-SGID for all element except first SGID in SG*/ - if (j % L % sg_size >= SGid) { - exit_if_not_equal(acc_up[j], static_cast(0x0) + (j - SGid), - "shuffle_up"); - } - - /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - exit_if_not_equal(acc_xor[j], - static_cast(0x0) + - (SGBeginGid + (SGLid ^ (SGid % sg_size))), - "shuffle_xor"); - SGLid++; - } - } catch (exception e) { - std::cout << "SYCL exception caught: " << e.what(); - exit(1); - } -} - -template -void check_struct(queue &Queue, Generator &Gen, size_t G = 256, size_t L = 64) { - - // Fill a vector with values that will be shuffled - std::vector values(G); - std::generate(values.begin(), values.end(), Gen); - - try { - nd_range<1> NdRange(G, L); - buffer buf(G); - buffer buf_up(G); - buffer buf_down(G); - buffer buf_xor(G); - buffer sgsizebuf(1); - buffer buf_in(values.data(), values.size()); - Queue.submit([&](handler &cgh) { - auto acc = buf.template get_access(cgh); - auto acc_up = buf_up.template get_access(cgh); - auto acc_down = - buf_down.template get_access(cgh); - auto acc_xor = buf_xor.template get_access(cgh); - auto sgsizeacc = sgsizebuf.get_access(cgh); - auto in = buf_in.template get_access(cgh); - - cgh.parallel_for( - NdRange, [=](nd_item<1> NdItem) { - sycl::sub_group SG = NdItem.get_sub_group(); - uint32_t wggid = NdItem.get_global_id(0); - uint32_t sgid = SG.get_group_id().get(0); - if (wggid == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; - - T val = in[wggid]; - - /*GID of middle element in every subgroup*/ - acc[NdItem.get_global_id()] = - SG.shuffle(val, SG.get_max_local_range()[0] / 2); - - /* Save GID-SGID */ - acc_up[NdItem.get_global_id()] = SG.shuffle_up(val, sgid); - - /* Save GID+SGID */ - acc_down[NdItem.get_global_id()] = SG.shuffle_down(val, sgid); - - /* Save GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - acc_xor[NdItem.get_global_id()] = - SG.shuffle_xor(val, sgid % SG.get_max_local_range()[0]); - }); - }); - host_accessor acc(buf); - host_accessor acc_up(buf_up); - host_accessor acc_down(buf_down); - host_accessor acc_xor(buf_xor); - host_accessor sgsizeacc(sgsizebuf); - - size_t sg_size = sgsizeacc[0]; - int SGid = 0; - int SGLid = 0; - int SGBeginGid = 0; - for (int j = 0; j < G; j++) { - if (j % L % sg_size == 0) { - SGid++; - SGLid = 0; - SGBeginGid = j; - } - if (j % L == 0) { - SGid = 0; - SGLid = 0; - SGBeginGid = j; - } - - /*GID of middle element in every subgroup*/ - exit_if_not_equal( - acc[j], values[j / L * L + SGid * sg_size + sg_size / 2], "shuffle"); - - /* Value GID+SGID for all element except last SGID in SG*/ - if (j % L % sg_size + SGid < sg_size && j % L + SGid < L) { - exit_if_not_equal(acc_down[j], values[j + SGid], "shuffle_down"); - } - - /* Value GID-SGID for all element except first SGID in SG*/ - if (j % L % sg_size >= SGid) { - exit_if_not_equal(acc_up[j], values[j - SGid], "shuffle_up"); - } - - /* Value GID with SGLID = ( SGLID XOR SGID ) % SGMaxSize */ - exit_if_not_equal(acc_xor[j], - values[SGBeginGid + (SGLid ^ (SGid % sg_size))], - "shuffle_xor"); - SGLid++; - } - } catch (exception e) { - std::cout << "SYCL exception caught: " << e.what(); - exit(1); - } -} - -int main() { - queue Queue; - - // Test shuffle of pointer types - check_pointer(Queue); - - // Test shuffle of non-native types - auto ComplexFloatGenerator = [state = std::complex(0, 1)]() mutable { - return state += std::complex(2, 2); - }; - check_struct>( - Queue, ComplexFloatGenerator); - - if (Queue.get_device().has(sycl::aspect::fp64)) { - auto ComplexDoubleGenerator = [state = - std::complex(0, 1)]() mutable { - return state += std::complex(2, 2); - }; - check_struct>( - Queue, ComplexDoubleGenerator); - } else { - std::cout << "fp64 tests were skipped due to the device not supporting the " - "aspect."; - } - - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/helper.hpp b/sycl/test-e2e/SubGroup/helper.hpp index dfb47988d85e0..2a88bb0f5652e 100644 --- a/sycl/test-e2e/SubGroup/helper.hpp +++ b/sycl/test-e2e/SubGroup/helper.hpp @@ -164,24 +164,3 @@ void exit_if_not_equal_vec(vec val, vec ref, const char *name) { exit(1); } } - -bool core_sg_supported(const device &Device) { - auto Vec = Device.get_info(); - if (std::find(Vec.begin(), Vec.end(), "cl_khr_subgroups") != std::end(Vec)) - return true; - - if (std::find(Vec.begin(), Vec.end(), "cl_intel_subgroups") != std::end(Vec)) - return true; - - if (Device.get_backend() == sycl::backend::opencl) { - // Extract the numerical version from the version string, OpenCL version - // string have the format "OpenCL . ". - std::string ver = Device.get_info().substr(7, 3); - - // cl_khr_subgroups was core in OpenCL 2.1 and 2.2, but went back to - // optional in 3.0 - return ver >= "2.1" && ver < "3.0"; - } - - return false; -} diff --git a/sycl/test-e2e/SubGroup/info.cpp b/sycl/test-e2e/SubGroup/info.cpp index 83e9fdd5a64bf..51e5f760b8de0 100644 --- a/sycl/test-e2e/SubGroup/info.cpp +++ b/sycl/test-e2e/SubGroup/info.cpp @@ -1,3 +1,7 @@ +// UNSUPPORTED: accelerator +// TODO: FPGAs currently report supported subgroups as {4,8,16,32,64}, causing +// this test to fail. Additionally, the kernel max_sub_group_size checks +// crash on FPGAs // RUN: %{build} -o %t.out // RUN: %{run} %t.out @@ -17,14 +21,20 @@ int main() { queue Queue; device Device = Queue.get_device(); - /* Basic sub-group functionality is supported as part of cl_khr_subgroups - * extension or as core OpenCL 2.1 feature. */ - if (!core_sg_supported(Device)) { - std::cout << "Skipping test\n"; - return 0; + bool old_opencl = false; + if (Device.get_backend() == sycl::backend::opencl) { + // Extract the numerical version from the version string, OpenCL version + // string have the format "OpenCL . ". + std::string ver = Device.get_info().substr(7, 3); + old_opencl = (ver < "2.1"); } + /* Check info::device parameters. */ - Device.get_info(); + if (!old_opencl) { + // Independent forward progress is missing on OpenCL backend prior to + // version 2.1 + Device.get_info(); + } Device.get_info(); try { @@ -49,30 +59,24 @@ int main() { }); uint32_t Res = 0; - /* sub_group_sizes can be queried only if cl_intel_required_subgroup_size - * extension is supported by device*/ - auto Vec = Device.get_info(); - if (std::find(Vec.begin(), Vec.end(), "cl_intel_required_subgroup_size") != - std::end(Vec)) { - auto sg_sizes = Device.get_info(); + auto sg_sizes = Device.get_info(); + + // Max sub-group size for a particular kernel might not be the max + // supported size on the device in general. Can only check that it is + // contained in list of valid sizes. + Res = Kernel.get_info( + Device); + bool Expected = + std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end(); + exit_if_not_equal(Expected, true, "max_sub_group_size"); - // Max sub-group size for a particular kernel might not be the max - // supported size on the device in general. Can only check that it is - // contained in list of valid sizes. + for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1), + range<3>(32, 3, 4), range<3>(7, 9, 11)}) { Res = Kernel.get_info( Device); - bool Expected = + Expected = std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end(); exit_if_not_equal(Expected, true, "max_sub_group_size"); - - for (auto r : {range<3>(3, 4, 5), range<3>(1, 1, 1), range<3>(4, 2, 1), - range<3>(32, 3, 4), range<3>(7, 9, 11)}) { - Res = Kernel.get_info( - Device); - Expected = - std::find(sg_sizes.begin(), sg_sizes.end(), Res) != sg_sizes.end(); - exit_if_not_equal(Expected, true, "max_sub_group_size"); - } } Res = Kernel.get_info( @@ -81,21 +85,11 @@ int main() { /* Sub-group size is not specified in kernel or IL*/ exit_if_not_equal(Res, 0, "compile_num_sub_groups"); - // According to specification, this kernel query requires `cl_khr_subgroups` - // or `cl_intel_subgroups` - if ((std::find(Vec.begin(), Vec.end(), "cl_khr_subgroups") != - std::end(Vec)) || - std::find(Vec.begin(), Vec.end(), "cl_intel_subgroups") != - std::end(Vec) && - std::find(Vec.begin(), Vec.end(), - "cl_intel_required_subgroup_size") != std::end(Vec)) { - Res = - Kernel.get_info( - Device); - - /* Required sub-group size is not specified in kernel or IL*/ - exit_if_not_equal(Res, 0, "compile_sub_group_size"); - } + Res = Kernel.get_info( + Device); + + /* Required sub-group size is not specified in kernel or IL*/ + exit_if_not_equal(Res, 0, "compile_sub_group_size"); } catch (exception e) { std::cout << "SYCL exception caught: " << e.what(); diff --git a/sycl/test-e2e/SubGroup/reduce.cpp b/sycl/test-e2e/SubGroup/reduce.cpp index 9422db73a599a..165556df711b1 100644 --- a/sycl/test-e2e/SubGroup/reduce.cpp +++ b/sycl/test-e2e/SubGroup/reduce.cpp @@ -13,10 +13,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); check(Queue); check(Queue); diff --git a/sycl/test-e2e/SubGroup/reduce_fp16.cpp b/sycl/test-e2e/SubGroup/reduce_fp16.cpp index 1140ab26677f9..ea40b0897e7c1 100644 --- a/sycl/test-e2e/SubGroup/reduce_fp16.cpp +++ b/sycl/test-e2e/SubGroup/reduce_fp16.cpp @@ -10,10 +10,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/reduce_fp64.cpp b/sycl/test-e2e/SubGroup/reduce_fp64.cpp index 0fd801deda7ec..f00b0000a6351 100644 --- a/sycl/test-e2e/SubGroup/reduce_fp64.cpp +++ b/sycl/test-e2e/SubGroup/reduce_fp64.cpp @@ -8,10 +8,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/reduce_spirv13.cpp b/sycl/test-e2e/SubGroup/reduce_spirv13.cpp index 43fbdb3b215c5..aaee44963d54a 100644 --- a/sycl/test-e2e/SubGroup/reduce_spirv13.cpp +++ b/sycl/test-e2e/SubGroup/reduce_spirv13.cpp @@ -8,10 +8,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); check_mul(Queue); diff --git a/sycl/test-e2e/SubGroup/reduce_spirv13_fp16.cpp b/sycl/test-e2e/SubGroup/reduce_spirv13_fp16.cpp index d7e074551d9f9..d74fbd9a856a8 100644 --- a/sycl/test-e2e/SubGroup/reduce_spirv13_fp16.cpp +++ b/sycl/test-e2e/SubGroup/reduce_spirv13_fp16.cpp @@ -11,10 +11,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/reduce_spirv13_fp64.cpp b/sycl/test-e2e/SubGroup/reduce_spirv13_fp64.cpp index 6106ad2ba86a1..543dc32e24fc3 100644 --- a/sycl/test-e2e/SubGroup/reduce_spirv13_fp64.cpp +++ b/sycl/test-e2e/SubGroup/reduce_spirv13_fp64.cpp @@ -10,10 +10,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/scan.cpp b/sycl/test-e2e/SubGroup/scan.cpp index 989fcf7f588a7..74db27c7f5208 100644 --- a/sycl/test-e2e/SubGroup/scan.cpp +++ b/sycl/test-e2e/SubGroup/scan.cpp @@ -14,10 +14,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); check(Queue); check(Queue); diff --git a/sycl/test-e2e/SubGroup/scan_fp16.cpp b/sycl/test-e2e/SubGroup/scan_fp16.cpp index d49b960f4148d..a9d26f4a396cd 100644 --- a/sycl/test-e2e/SubGroup/scan_fp16.cpp +++ b/sycl/test-e2e/SubGroup/scan_fp16.cpp @@ -11,10 +11,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/scan_fp64.cpp b/sycl/test-e2e/SubGroup/scan_fp64.cpp index 6e32d278386ab..abd29566e9d77 100644 --- a/sycl/test-e2e/SubGroup/scan_fp64.cpp +++ b/sycl/test-e2e/SubGroup/scan_fp64.cpp @@ -9,10 +9,6 @@ #include int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/scan_spirv13.cpp b/sycl/test-e2e/SubGroup/scan_spirv13.cpp index 0966161844d1f..774283d0d6e13 100644 --- a/sycl/test-e2e/SubGroup/scan_spirv13.cpp +++ b/sycl/test-e2e/SubGroup/scan_spirv13.cpp @@ -9,10 +9,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); check_mul(Queue); check_mul(Queue); diff --git a/sycl/test-e2e/SubGroup/scan_spirv13_fp16.cpp b/sycl/test-e2e/SubGroup/scan_spirv13_fp16.cpp index 9a213bd885b1b..136c3210d122a 100644 --- a/sycl/test-e2e/SubGroup/scan_spirv13_fp16.cpp +++ b/sycl/test-e2e/SubGroup/scan_spirv13_fp16.cpp @@ -12,10 +12,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check_mul(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/scan_spirv13_fp64.cpp b/sycl/test-e2e/SubGroup/scan_spirv13_fp64.cpp index f78a86c9a8e2d..8ea36613ed613 100644 --- a/sycl/test-e2e/SubGroup/scan_spirv13_fp64.cpp +++ b/sycl/test-e2e/SubGroup/scan_spirv13_fp64.cpp @@ -11,10 +11,6 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue); std::cout << "Test passed." << std::endl; return 0; diff --git a/sycl/test-e2e/SubGroup/shuffle.cpp b/sycl/test-e2e/SubGroup/shuffle.cpp deleted file mode 100644 index 2baf624ce804c..0000000000000 --- a/sycl/test-e2e/SubGroup/shuffle.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -//==------------ shuffle.cpp - SYCL sub_group shuffle test -----*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "shuffle.hpp" -#include - -int main() { - queue Queue; - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - - // Check long long and unsigned long long because they differ from - // long and unsigned long according to C++ rules even if they have the same - // size at some system. - check(Queue); - check(Queue); - check(Queue); - check(Queue); - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/shuffle_fp16.cpp b/sycl/test-e2e/SubGroup/shuffle_fp16.cpp deleted file mode 100644 index b2fea85d0126f..0000000000000 --- a/sycl/test-e2e/SubGroup/shuffle_fp16.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// REQUIRES: aspect-fp16 -// REQUIRES: gpu - -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "shuffle.hpp" -#include - -int main() { - queue Queue; - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/shuffle_fp64.cpp b/sycl/test-e2e/SubGroup/shuffle_fp64.cpp deleted file mode 100644 index 1440de1d25a8a..0000000000000 --- a/sycl/test-e2e/SubGroup/shuffle_fp64.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// REQUIRES: aspect-fp64 -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out -// -//==------- shuffle_fp64.cpp - SYCL sub_group shuffle test -----*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "shuffle.hpp" -#include - -int main() { - queue Queue; - check(Queue); - check(Queue); - check(Queue); - check(Queue); - check(Queue); - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/sycl/test-e2e/SubGroup/vote.cpp b/sycl/test-e2e/SubGroup/vote.cpp index d835e2d2d9ec1..5fb9ac0066194 100644 --- a/sycl/test-e2e/SubGroup/vote.cpp +++ b/sycl/test-e2e/SubGroup/vote.cpp @@ -69,10 +69,6 @@ void check(queue Queue, const int G, const int L, const int D, const int R) { } int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } check(Queue, 240, 80, 3, 1); check(Queue, 24, 12, 3, 4); check(Queue, 1024, 256, 3, 1); diff --git a/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp b/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp new file mode 100644 index 0000000000000..cbbcf52e3ab25 --- /dev/null +++ b/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp @@ -0,0 +1,236 @@ +// REQUIRES: aspect-ext_oneapi_virtual_mem, usm_shared_allocations + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +#include +#include +#include + +namespace syclext = sycl::ext::oneapi::experimental; + +// Find the least common multiple of the context and device granularities. This +// value can be used for aligning both physical memory allocations and for +// reserving virtual memory ranges. +size_t GetLCMGranularity(const sycl::device &Dev, const sycl::context &Ctx) { + size_t CtxGranularity = syclext::get_mem_granularity(MContext); + size_t DevGranularity = syclext::get_mem_granularity(MDevice, MContext); + + size_t GCD = CtxGranularity; + size_t Rem = DevGranularity % GCD; + while (Rem != 0) { + std::swap(GCD, Rem); + Rem %= GCD; + } + return (DevGranularity / GCD) * LCMGranularity; +} + +template class VirtualVector { +public: + VirtualVector(sycl::queue &Q) + : MDevice{Q.get_device()}, MContext{Q.get_context()}, + MGranularity{GetLCMGranularity(MDevice, MContext)} {}; + + ~VirtualVector() { + // Free all mapped ranges. + unmap_all(); + for (const VirtualAddressRange &VARange : MVARanges) + syclext::free_virtual_mem(VARange.Ptr, VARange.Size, MContext); + // Physical memory allocations will be freed when the physical_mem objects + // die with MPhysicalMemMappings. + } + + void reserve(size_t NewSize) { + // If we already have more memory than required, we can return. + size_t NewByteSize = sizeof(T) * NewSize; + if (NewByteSize <= MByteSize) { + MSize = NewSize; + return; + } + + // Align the size by the granularity. + size_t AlignedNewByteSize = AlignByteSize(NewByteSize); + size_t AlignedNewVARangeSize = AlignedNewByteSize - MByteSize; + + // Try to reserve virtual memory at the end of the existing one. + uintptr_t CurrentEnd = reinterpret_cast(MBasePtr) + MByteSize; + uintptr_t NewVAPtr = syclext::reserve_virtual_mem( + CurrentEnd, AlignedNewVARangeSize, MContext); + + // If we failed to get a ptr to the end of the current range, we need to + // recreate the whole range. + if (CurrentEnd && NewVAPtr != CurrentEnd) { + // First we need to free the virtual address range we just reserved. + syclext::free_virtual_mem(NewVAPtr, AlignedNewVARangeSize, MContext); + + // Recreate the full range and update the new VA ptr. CurrentEnd is no + // longer valid after this call. + NewVAPtr = RecreateAddressRange(AlignedNewByteSize); + } else { + // Otherwise we need to register the new range. + MVARanges.emplace_back(NewVAPtr, AlignedNewVARangeSize); + + // If there was no base pointer previously, this is now the new base. + if (!MBasePtr) + MBasePtr = reinterpret_cast(NewVAPtr); + } + + // Create new physical memory allocation and map the new range to it. + syclext::physical_mem NewPhysicalMem{MDevice, MContext, + AlignedNewVARangeSize}; + void *MappedPtr = + NewPhysicalMem.map(NewVAPtr, AlignedNewVARangeSize, + syclext::address_access_mode::read_write); + MPhysicalMemMappings.push_back( + std::make_pair(std::move(NewPhysicalMem), MappedPtr)); + + // Update the byte size of the vector. + MSize = NewSize; + MByteSize = AlignedNewByteSize; + } + + size_t size() const noexcept { return MSize; } + T *data() const noexcept { return MBasePtr; } + +private: + size_t AlignByteSize(size_t UnalignedByteSize) const { + return ((UnalignedByteSize + MGranularity - 1) / MGranularity) * + MGranularity; + } + + void unmap_all() { + for (std::pair &Mapping : + MPhysicalMemMappings) { + if (Mapping.second == 0) + continue; + syclext::unmap(Mapping.second, Mapping.first.size(), MContext); + Mapping.second = 0; + } + } + + uintptr_t RecreateAddressRange(size_t AlignedNewByteSize) { + // Reserve the full range. + uintptr_t NewFullVAPtr = + syclext::reserve_virtual_mem(AlignedNewByteSize, MContext); + + // Unmap the old virtual address ranges. + unmap_all(); + + // Remap all existing ranges. + uintptr_t NewEnd = NewFullVAPtr; + for (std::pair &Mapping : + MPhysicalMemMappings) { + Mapping.second = + Mapping.first.map(NewEnd, Mapping.first.size(), + syclext::address_access_mode::read_write); + NewEnd += Mapping.first.size(); + } + + // Free the old ranges. + for (const VirtualAddressRange &VARange : MVARanges) + syclext::free_virtual_mem(VARange.Ptr, VARange.Size, MContext); + + // Insert the newly reserved range to the saved ranges. + MVARanges.clear(); + MVARanges.emplace_back(NewFullVAPtr, AlignedNewByteSize); + + // Update the base pointer to point to the new start. + MBasePtr = reinterpret_cast(NewFullVAPtr); + + // Return the new end of the mapped ranges. + return NewEnd; + } + + struct VirtualAddressRange { + VirtualAddressRange(uintptr_t Ptr, size_t Size) : Ptr{Ptr}, Size{Size} {} + + uintptr_t Ptr; + size_t Size; + }; + + sycl::device MDevice; + sycl::context MContext; + + std::vector MVARanges; + std::vector> MPhysicalMemMappings; + + T *MBasePtr = nullptr; + size_t MSize = 0; + size_t MByteSize = 0; + + const size_t MGranularity = 0; +}; + +static constexpr size_t NumIters = 10; +static constexpr size_t WriteValueOffset = 42; +static constexpr size_t NumWorkItems = 512; + +int main() { + sycl::queue Q; + + VirtualVector Vec(Q); + + // To better test the functionality, try to allocate below the granularity + // but enough to require more memory for some iterations. + size_t SizeIncrement = 11; + size_t MinSizeGran = + syclext::get_mem_granularity(Q.get_device(), Q.get_context()) / + sizeof(int); + SizeIncrement = std::max(MinSizeGran / 2 - 1, SizeIncrement); + + // Each work-item will work on multiple elements. + size_t NumElemsPerWI = 1 + (SizeIncrement - 1) / NumWorkItems; + + for (size_t I = 0; I < NumIters; ++I) { + // Increment the size of the vector. + size_t NewVecSize = (I + 1) * SizeIncrement; + Vec.reserve(NewVecSize); + assert(Vec.size() == NewVecSize); + + // Populate to the new memory + int *VecDataPtr = Vec.data(); + size_t StartOffset = I * SizeIncrement; + size_t IterWriteValueOffset = WriteValueOffset * (I + 1); + Q.parallel_for(sycl::range<1>{NumWorkItems}, [=](sycl::item<1> Idx) { + for (size_t J = 0; J < NumElemsPerWI; ++J) { + size_t LoopIdx = J * Idx.get_range(0) + Idx; + size_t OffsetIdx = StartOffset + LoopIdx; + if (OffsetIdx < NewVecSize) + VecDataPtr[OffsetIdx] = LoopIdx + IterWriteValueOffset; + } + }).wait_and_throw(); + + // Copy back the values and verify. + int *CopyBack = sycl::malloc_shared(NewVecSize, Q); + + // TODO: Level-zero (excluding on PVC) does not currently allow copy across + // virtual memory ranges, even if they are consequtive. + syclext::architecture DevArch = + Q.get_device().get_info(); + if (Q.get_backend() == sycl::backend::ext_oneapi_level_zero && + DevArch != syclext::architecture::intel_gpu_pvc && + DevArch != syclext::architecture::intel_gpu_pvc_vg) { + Q.parallel_for(sycl::range<1>{NewVecSize}, [=](sycl::id<1> Idx) { + CopyBack[Idx] = VecDataPtr[Idx]; + }).wait_and_throw(); + } else { + Q.copy(VecDataPtr, CopyBack, NewVecSize).wait_and_throw(); + } + + for (size_t J = 0; J < NewVecSize; ++J) { + int ExpectedVal = + J % SizeIncrement + WriteValueOffset * (J / SizeIncrement + 1); + if (CopyBack[J] != ExpectedVal) { + std::cout << "Comparison failed at index " << J << ": " << CopyBack[J] + << " != " << ExpectedVal << std::endl; + return 1; + } + } + sycl::free(CopyBack, Q); + } + + return 0; +} diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump index d3047c6bb1cd0..4808f55c487cd 100644 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ b/sycl/test/abi/pi_cuda_symbol_check.dump @@ -114,6 +114,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle @@ -146,6 +147,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -171,4 +175,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump index c83b4a4ba6122..2c421ad2fec55 100644 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ b/sycl/test/abi/pi_hip_symbol_check.dump @@ -114,6 +114,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle @@ -146,6 +147,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -171,4 +175,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index d6cc82870c669..7e8fc7500f4a4 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -113,6 +113,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle @@ -145,6 +146,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -170,4 +174,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump index 850e6d22fdb72..10f19aac80652 100644 --- a/sycl/test/abi/pi_nativecpu_symbol_check.dump +++ b/sycl/test/abi/pi_nativecpu_symbol_check.dump @@ -114,6 +114,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle @@ -146,6 +147,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -171,4 +175,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index daaf7bbee5de5..c0e1b76c4d04f 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -113,6 +113,7 @@ piextDisablePeerAccess piextEnablePeerAccess piextEnqueueCommandBuffer piextEnqueueCooperativeKernelLaunch +piextEnqueueNativeCommand piextEnqueueReadHostPipe piextEnqueueWriteHostPipe piextEventCreateWithNativeHandle @@ -133,6 +134,9 @@ piextMemGetNativeHandle piextMemImageAllocate piextMemImageCopy piextMemImageCreateWithNativeHandle +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextMemImageFree piextMemImageGetInfo piextMemImportOpaqueFD @@ -170,4 +174,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index dec17d9f11fe8..99fb95d92fa72 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2988,11 +2988,17 @@ _ZN4sycl3_V13ext5intel12experimental15online_compilerILNS3_15source_languageE0EE _ZN4sycl3_V13ext5intel12experimental15online_compilerILNS3_15source_languageE1EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISE_EEEEES8_IhSaIhEERKSE_DpRKT_ _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE -_ZN4sycl3_V13ext6oneapi10level_zero10make_eventERKNS0_7contextEmb -_ZN4sycl3_V13ext6oneapi10level_zero10make_queueERKNS0_7contextERKNS0_6deviceEmbbRKNS0_13property_listE -_ZN4sycl3_V13ext6oneapi10level_zero11make_deviceERKNS0_8platformEm -_ZN4sycl3_V13ext6oneapi10level_zero12make_contextERKSt6vectorINS0_6deviceESaIS5_EEmb -_ZN4sycl3_V13ext6oneapi10level_zero13make_platformEm +_ZN4sycl3_V13ext6oneapi12experimental10mem_adviseENS0_5queueEPvmiRKNS0_6detail13code_locationE +_ZN4sycl3_V13ext6oneapi10level_zero6detail11make_deviceERKNS0_8platformEm +_ZN4sycl3_V13ext6oneapi12experimental12physical_memC1ERKNS0_6deviceERKNS0_7contextEm +_ZN4sycl3_V13ext6oneapi12experimental12physical_memC2ERKNS0_6deviceERKNS0_7contextEm +_ZN4sycl3_V13ext6oneapi12experimental15get_access_modeEPKvmRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental15set_access_modeEPKvmNS3_19address_access_modeERKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental16free_virtual_memEmmRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental19get_mem_granularityERKNS0_6deviceERKNS0_7contextENS3_16granularity_modeE +_ZN4sycl3_V13ext6oneapi12experimental19get_mem_granularityERKNS0_7contextENS3_16granularity_modeE +_ZN4sycl3_V13ext6oneapi12experimental19reserve_virtual_memEmmRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental5unmapEPKvmRKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_22bindless_image_samplerERKNS3_16image_descriptorERKNS0_5queueE @@ -3092,6 +3098,8 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC1ERKNS0_5 _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC1ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC2ERKNS0_5queueERKNS0_13property_listE _ZN4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graphC2ERKNS0_7contextERKNS0_6deviceERKNS0_13property_listE +_ZN4sycl3_V13ext6oneapi12experimental6memcpyENS0_5queueEPvPKvmRKNS0_6detail13code_locationE +_ZN4sycl3_V13ext6oneapi12experimental6memsetENS0_5queueEPvimRKNS0_6detail13code_locationE _ZN4sycl3_V13ext6oneapi12experimental9image_memC1ERKNS3_16image_descriptorERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental9image_memC1ERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental9image_memC2ERKNS3_16image_descriptorERKNS0_5queueE @@ -3131,6 +3139,7 @@ _ZN4sycl3_V15queue20wait_and_throw_proxyERKNS0_6detail13code_locationE _ZN4sycl3_V15queue22memcpyFromDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKSt6vectorINS0_5eventESaIS3_EERKNS0_6detail13code_locationE +_ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE _ZN4sycl3_V15queue29ext_oneapi_set_external_eventERKNS0_5eventE @@ -3158,37 +3167,10 @@ _ZN4sycl3_V16detail10build_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEERK _ZN4sycl3_V16detail10frexp_implENS1_9half_impl4halfEPi _ZN4sycl3_V16detail10frexp_implEdPi _ZN4sycl3_V16detail10frexp_implEfPi -_ZN4sycl3_V16detail10image_impl10getDevicesESt10shared_ptrINS1_12context_implEE -_ZN4sycl3_V16detail10image_impl11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event -_ZN4sycl3_V16detail10image_impl14checkImageDescERK14_pi_image_descSt10shared_ptrINS1_12context_implEEPv -_ZN4sycl3_V16detail10image_impl16checkImageFormatERK16_pi_image_formatSt10shared_ptrINS1_12context_implEE -_ZN4sycl3_V16detail10image_impl34sampledImageDestructorNotificationEPv -_ZN4sycl3_V16detail10image_impl35sampledImageConstructorNotificationERKNS1_13code_locationEPvPKvjPmNS0_12image_formatERKNS0_13image_samplerE -_ZN4sycl3_V16detail10image_impl36unsampledImageDestructorNotificationEPv -_ZN4sycl3_V16detail10image_impl37unsampledImageConstructorNotificationERKNS1_13code_locationEPvPKvjPmNS0_12image_formatE -_ZN4sycl3_V16detail10image_implC1EP7_cl_memRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EEh -_ZN4sycl3_V16detail10image_implC1EmRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EEhNS0_19image_channel_orderENS0_18image_channel_typeEbNS0_5rangeILi3EEE -_ZN4sycl3_V16detail10image_implC2EP7_cl_memRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteISA_EEh -_ZN4sycl3_V16detail10image_implC2EmRKNS0_7contextENS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EEhNS0_19image_channel_orderENS0_18image_channel_typeEbNS0_5rangeILi3EEE _ZN4sycl3_V16detail10make_eventEmRKNS0_7contextENS0_7backendE _ZN4sycl3_V16detail10make_eventEmRKNS0_7contextEbNS0_7backendE _ZN4sycl3_V16detail10make_queueEmiRKNS0_7contextEPKNS0_6deviceEbRKNS0_13property_listERKSt8functionIFvNS0_14exception_listEEENS0_7backendE _ZN4sycl3_V16detail10waitEventsESt6vectorINS0_5eventESaIS3_EE -_ZN4sycl3_V16detail11SYCLMemObjT10releaseMemESt10shared_ptrINS1_12context_implEEPv -_ZN4sycl3_V16detail11SYCLMemObjT16determineHostPtrERKSt10shared_ptrINS1_12context_implEEbRPvRb -_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEPv -_ZN4sycl3_V16detail11SYCLMemObjT16updateHostMemoryEv -_ZN4sycl3_V16detail11SYCLMemObjT20getBufSizeForContextERKSt10shared_ptrINS1_12context_implEEm -_ZN4sycl3_V16detail11SYCLMemObjT27handleWriteAccessorCreationEv -_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE23_pi_image_channel_order22_pi_image_channel_typeNS0_5rangeILi3EEEjm -_ZN4sycl3_V16detail11SYCLMemObjTC1EmRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEbNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE23_pi_image_channel_order22_pi_image_channel_typeNS0_5rangeILi3EEEjm -_ZN4sycl3_V16detail11SYCLMemObjTC2EmRKNS0_7contextEmNS0_5eventESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS8_EE -_ZN4sycl3_V16detail11buffer_impl11allocateMemESt10shared_ptrINS1_12context_implEEbPvRP9_pi_event -_ZN4sycl3_V16detail11buffer_impl22destructorNotificationEPv -_ZN4sycl3_V16detail11buffer_impl23constructorNotificationERKNS1_13code_locationEPvPKvS8_jjPm _ZN4sycl3_V16detail11image_plain14set_write_backEb _ZN4sycl3_V16detail11image_plain23set_final_data_internalERKSt8functionIFvRKS3_IFvPvEEEE _ZN4sycl3_V16detail11image_plain23set_final_data_internalEv @@ -3231,16 +3213,6 @@ _ZN4sycl3_V16detail11remquo_implEffPi _ZN4sycl3_V16detail11sincos_implENS1_9half_impl4halfEPS3_ _ZN4sycl3_V16detail11sincos_implEdPd _ZN4sycl3_V16detail11sincos_implEfPf -_ZN4sycl3_V16detail11stream_impl14initStreamHostESt10shared_ptrINS1_10queue_implEE -_ZN4sycl3_V16detail11stream_impl15accessGlobalBufERNS0_7handlerE -_ZN4sycl3_V16detail11stream_impl18accessGlobalOffsetERNS0_7handlerE -_ZN4sycl3_V16detail11stream_impl20accessGlobalFlushBufERNS0_7handlerE -_ZN4sycl3_V16detail11stream_impl5flushERKSt10shared_ptrINS1_10event_implEE -_ZN4sycl3_V16detail11stream_impl5flushEv -_ZN4sycl3_V16detail11stream_implC1EmmRKNS0_13property_listE -_ZN4sycl3_V16detail11stream_implC1EmmRNS0_7handlerE -_ZN4sycl3_V16detail11stream_implC2EmmRKNS0_13property_listE -_ZN4sycl3_V16detail11stream_implC2EmmRNS0_7handlerE _ZN4sycl3_V16detail12buffer_plain14deleteAccPropsERKNS1_16PropWithDataKindE _ZN4sycl3_V16detail12buffer_plain14set_write_backEb _ZN4sycl3_V16detail12buffer_plain23constructorNotificationERKNS1_13code_locationEPvPKvS8_jjPm @@ -3261,57 +3233,7 @@ _ZN4sycl3_V16detail12buffer_plainC2EmNS0_7contextESt10unique_ptrINS1_19SYCLMemOb _ZN4sycl3_V16detail12buffer_plainC2EmmRKNS0_13property_listESt10unique_ptrINS1_19SYCLMemObjAllocatorESt14default_deleteIS7_EE _ZN4sycl3_V16detail12compile_implERKNS0_13kernel_bundleILNS0_12bundle_stateE0EEERKSt6vectorINS0_6deviceESaIS8_EERKNS0_13property_listE _ZN4sycl3_V16detail12isOutOfRangeENS0_3vecIiLi4EEENS0_15addressing_modeENS0_5rangeILi3EEE -_ZN4sycl3_V16detail12make_contextEmRKSt8functionIFvNS0_14exception_listEEENS0_7backendE -_ZN4sycl3_V16detail12sampler_impl18getOrCreateSamplerERKNS0_7contextE -_ZN4sycl3_V16detail12sampler_implC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE -_ZN4sycl3_V16detail12sampler_implC1EP11_cl_samplerRKNS0_7contextE -_ZN4sycl3_V16detail12sampler_implC2ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE -_ZN4sycl3_V16detail12sampler_implC2EP11_cl_samplerRKNS0_7contextE -_ZN4sycl3_V16detail12sampler_implD1Ev -_ZN4sycl3_V16detail12sampler_implD2Ev -_ZN4sycl3_V16detail13MemoryManager10advise_usmEPKvSt10shared_ptrINS1_10queue_implEEm14_pi_mem_adviceSt6vectorIP9_pi_eventSaISB_EEPSB_ -_ZN4sycl3_V16detail13MemoryManager10advise_usmEPKvSt10shared_ptrINS1_10queue_implEEm14_pi_mem_adviceSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager11copy_2d_usmEPKvmSt10shared_ptrINS1_10queue_implEEPvmmmSt6vectorIP9_pi_eventSaISB_EEPSB_ -_ZN4sycl3_V16detail13MemoryManager11copy_2d_usmEPKvmSt10shared_ptrINS1_10queue_implEEPvmmmSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager11fill_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_ -_ZN4sycl3_V16detail13MemoryManager11fill_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmRKSt6vectorIcSaIcEES7_IP9_pi_eventSaISD_EEPSD_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EEPS9_ -_ZN4sycl3_V16detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager13memset_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmcSt6vectorIP9_pi_eventSaIS9_EEPS9_ -_ZN4sycl3_V16detail13MemoryManager13memset_2d_usmEPvSt10shared_ptrINS1_10queue_implEEmmmcSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager13releaseMemObjESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvS8_ -_ZN4sycl3_V16detail13MemoryManager16allocateMemImageESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRK14_pi_image_descRK16_pi_image_formatRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager17allocateMemBufferESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager18allocateHostMemoryEPNS1_11SYCLMemObjIEPvbmRKNS0_13property_listE -_ZN4sycl3_V16detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12context_implEEPvbRK14_pi_image_descRK16_pi_image_formatRKNS0_13property_listE -_ZN4sycl3_V16detail13MemoryManager19copy_image_bindlessEPvSt10shared_ptrINS1_10queue_implEES3_RK14_pi_image_descRK16_pi_image_format20_pi_image_copy_flags22pi_image_offset_structSE_22pi_image_region_structSF_RKSt6vectorIP9_pi_eventSaISI_EEPSI_ -_ZN4sycl3_V16detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE -_ZN4sycl3_V16detail13MemoryManager20allocateMemSubBufferESt10shared_ptrINS1_12context_implEEPvmmNS0_5rangeILi3EEESt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager21copy_to_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmS4_RKSt6vectorIP9_pi_eventSaISA_EEPSA_ -_ZN4sycl3_V16detail13MemoryManager21copy_to_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmS4_RKSt6vectorIP9_pi_eventSaISA_EEPSA_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager23copy_from_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmPvRKSt6vectorIP9_pi_eventSaISB_EEPSB_ -_ZN4sycl3_V16detail13MemoryManager23copy_from_device_globalEPKvbSt10shared_ptrINS1_10queue_implEEmmPvRKSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager26ext_oneapi_fill_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvmPKcjNS0_5rangeILi3EEESE_NS0_2idILi3EEEjSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSA_jSC_SC_SE_jSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyD2H_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPvjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjPcjSC_SE_jSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager29ext_oneapi_copyH2D_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPNS1_11SYCLMemObjIEPcjNS0_5rangeILi3EEENS0_2idILi3EEEjPvjSC_SC_SE_jSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_copy_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEPKvP22_pi_ext_command_buffermPvSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager30ext_oneapi_fill_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmiSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager32ext_oneapi_advise_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPKvm14_pi_mem_adviceSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager34ext_oneapi_prefetch_usm_cmd_bufferESt10shared_ptrINS1_12context_implEEP22_pi_ext_command_bufferPvmSt6vectorIjSaIjEEPj -_ZN4sycl3_V16detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ -_ZN4sycl3_V16detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_ -_ZN4sycl3_V16detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_RKS6_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEmPKcjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ -_ZN4sycl3_V16detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEmPKcjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_RKS6_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager5unmapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEES5_St6vectorIP9_pi_eventSaISB_EERSB_ -_ZN4sycl3_V16detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event -_ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_ -_ZN4sycl3_V16detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_RKS5_INS1_10event_implEE -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_ -_ZN4sycl3_V16detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_RKS4_INS1_10event_implEE +_ZN4sycl3_V16detail12make_contextEmRKSt8functionIFvNS0_14exception_listEEENS0_7backendEbRKSt6vectorINS0_6deviceESaISA_EE _ZN4sycl3_V16detail13host_pipe_map3addEPKvPKc _ZN4sycl3_V16detail13lgamma_r_implENS1_9half_impl4halfEPi _ZN4sycl3_V16detail13lgamma_r_implEdPi @@ -3334,32 +3256,19 @@ _ZN4sycl3_V16detail16AccessorBaseHost14getAccessRangeEv _ZN4sycl3_V16detail16AccessorBaseHost14getMemoryRangeEv _ZN4sycl3_V16detail16AccessorBaseHost6getPtrEv _ZN4sycl3_V16detail16AccessorBaseHost9getOffsetEv -_ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibibRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibmbRKNS0_13property_listE -_ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviiibRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC1ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE -_ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibibRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviibmbRKNS0_13property_listE -_ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviiibRKNS0_13property_listE _ZN4sycl3_V16detail16AccessorBaseHostC2ENS0_2idILi3EEENS0_5rangeILi3EEES6_NS0_6access4modeEPviimbRKNS0_13property_listE -_ZN4sycl3_V16detail16AccessorImplHost6resizeEm -_ZN4sycl3_V16detail16AccessorImplHostD1Ev -_ZN4sycl3_V16detail16AccessorImplHostD2Ev _ZN4sycl3_V16detail16reduGetMaxWGSizeESt10shared_ptrINS1_10queue_implEEm _ZN4sycl3_V16detail17HostProfilingInfo3endEv _ZN4sycl3_V16detail17HostProfilingInfo5startEv _ZN4sycl3_V16detail17device_global_map3addEPKvPKc _ZN4sycl3_V16detail17reduComputeWGSizeEmmRm -_ZN4sycl3_V16detail18convertChannelTypeE22_pi_image_channel_type -_ZN4sycl3_V16detail18convertChannelTypeENS0_18image_channel_typeE _ZN4sycl3_V16detail18get_kernel_id_implENS1_11string_viewE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextENS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextEbNS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18stringifyErrorCodeEi -_ZN4sycl3_V16detail19convertChannelOrderE23_pi_image_channel_order -_ZN4sycl3_V16detail19convertChannelOrderENS0_19image_channel_orderE -_ZN4sycl3_V16detail19getImageElementSizeEhNS0_18image_channel_typeE -_ZN4sycl3_V16detail19getPluginOpaqueDataILNS0_7backendE5EEEPvS4_ _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16detail19kernel_bundle_plain32set_specialization_constant_implEPKcPvm @@ -3375,7 +3284,6 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv _ZN4sycl3_V16detail21LocalAccessorBaseHostC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE -_ZN4sycl3_V16detail22getImageNumberChannelsENS0_19image_channel_orderE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE @@ -3384,7 +3292,6 @@ _ZN4sycl3_V16detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6devi _ZN4sycl3_V16detail22reduGetPreferredWGSizeERSt10shared_ptrINS1_10queue_implEEm _ZN4sycl3_V16detail22removeDuplicateDevicesERKSt6vectorINS0_6deviceESaIS3_EE _ZN4sycl3_V16detail23constructorNotificationEPvS2_NS0_6access6targetENS3_4modeERKNS1_13code_locationE -_ZN4sycl3_V16detail23getESIMDDeviceInterfaceEv _ZN4sycl3_V16detail24find_device_intersectionERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EE _ZN4sycl3_V16detail26isDeviceGlobalUsedInKernelEPKv _ZN4sycl3_V16detail27getPixelCoordLinearFiltModeENS0_3vecIfLi4EEENS0_15addressing_modeENS0_5rangeILi3EEERS3_ @@ -3400,7 +3307,6 @@ _ZN4sycl3_V16detail2pi9assertionEbPKc _ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE1EEERKSt10shared_ptrINS1_6pluginEEv _ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE2EEERKSt10shared_ptrINS1_6pluginEEv _ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE3EEERKSt10shared_ptrINS1_6pluginEEv -_ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE5EEERKSt10shared_ptrINS1_6pluginEEv _ZN4sycl3_V16detail2pi9getPluginILNS0_7backendE6EEERKSt10shared_ptrINS1_6pluginEEv _ZN4sycl3_V16detail30UnsampledImageAccessorBaseHost10getAccDataEv _ZN4sycl3_V16detail30UnsampledImageAccessorBaseHost6getPtrEv @@ -3544,12 +3450,8 @@ _ZN4sycl3_V16native13__divide_implENS0_3vecIfLi3EEES3_ _ZN4sycl3_V16native13__divide_implENS0_3vecIfLi4EEES3_ _ZN4sycl3_V16native13__divide_implENS0_3vecIfLi8EEES3_ _ZN4sycl3_V16native13__divide_implEff -_ZN4sycl3_V16opencl10make_queueERKNS0_7contextEm -_ZN4sycl3_V16opencl11make_deviceEm -_ZN4sycl3_V16opencl12make_contextEm _ZN4sycl3_V16opencl13has_extensionERKNS0_6deviceERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZN4sycl3_V16opencl13has_extensionERKNS0_8platformERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE -_ZN4sycl3_V16opencl13make_platformEm _ZN4sycl3_V16streamC1EmmRNS0_7handlerE _ZN4sycl3_V16streamC1EmmRNS0_7handlerERKNS0_13property_listE _ZN4sycl3_V16streamC2EmmRNS0_7handlerE @@ -3637,10 +3539,14 @@ _ZN4sycl3_V17handler8finalizeEv _ZN4sycl3_V17handler8prefetchEPKvm _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b +_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb _ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEb +_ZN4sycl3_V17handlerC1ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_3ext6oneapi12experimental6detail10graph_implEE _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_b +_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEES5_S5_bb _ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEb +_ZN4sycl3_V17handlerC2ESt10shared_ptrINS0_6detail10queue_implEEbb _ZN4sycl3_V17samplerC1ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE _ZN4sycl3_V17samplerC1EP11_cl_samplerRKNS0_7contextE _ZN4sycl3_V17samplerC2ENS0_29coordinate_normalization_modeENS0_15addressing_modeENS0_14filtering_modeERKNS0_13property_listE @@ -3695,6 +3601,10 @@ _ZNK4sycl3_V114interop_handle16getNativeContextEv _ZNK4sycl3_V115device_selector13select_deviceEv _ZNK4sycl3_V116default_selectorclERKNS0_6deviceE _ZNK4sycl3_V120accelerator_selectorclERKNS0_6deviceE +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem10get_deviceEv +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem11get_contextEv +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem3mapEmmNS3_19address_access_modeEm +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem4sizeEv _ZNK4sycl3_V13ext6oneapi12experimental4node14get_successorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node16get_predecessorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node8get_typeEv @@ -3771,11 +3681,6 @@ _ZNK4sycl3_V15queue9getNativeERi _ZNK4sycl3_V16ONEAPI15filter_selector13select_deviceEv _ZNK4sycl3_V16ONEAPI15filter_selector5resetEv _ZNK4sycl3_V16ONEAPI15filter_selectorclERKNS0_6deviceE -_ZNK4sycl3_V16detail11SYCLMemObjT18detachMemoryObjectERKSt10shared_ptrIS2_E -_ZNK4sycl3_V16detail11SYCLMemObjT9getPluginEv -_ZNK4sycl3_V16detail11SYCLMemObjT9isInteropEv -_ZNK4sycl3_V16detail11buffer_impl15getNativeVectorENS0_7backendE -_ZNK4sycl3_V16detail11buffer_impl16addInteropObjectERSt6vectorImSaImEE _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device10gpu_slicesEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11free_memoryEEENT_11return_typeEv _ZNK4sycl3_V16detail11device_impl8get_infoINS0_3ext5intel4info6device11pci_addressEEENT_11return_typeEv @@ -3856,10 +3761,6 @@ _ZNK4sycl3_V16detail11image_plain8get_sizeEv _ZNK4sycl3_V16detail11image_plain9get_countEv _ZNK4sycl3_V16detail11image_plain9get_pitchEv _ZNK4sycl3_V16detail11image_plain9get_rangeEv -_ZNK4sycl3_V16detail11stream_impl22get_max_statement_sizeEv -_ZNK4sycl3_V16detail11stream_impl25get_work_item_buffer_sizeEv -_ZNK4sycl3_V16detail11stream_impl4sizeEv -_ZNK4sycl3_V16detail11stream_impl8get_sizeEv _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi4cuda8property7context19use_primary_contextEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue12priority_lowEEET_v _ZNK4sycl3_V16detail12buffer_plain12get_propertyINS0_3ext6oneapi8property5queue13priority_highEEET_v @@ -3898,9 +3799,6 @@ _ZNK4sycl3_V16detail12buffer_plain13handleReleaseEv _ZNK4sycl3_V16detail12buffer_plain15getNativeVectorENS0_7backendE _ZNK4sycl3_V16detail12buffer_plain22get_allocator_internalEv _ZNK4sycl3_V16detail12buffer_plain7getSizeEv -_ZNK4sycl3_V16detail12sampler_impl18get_filtering_modeEv -_ZNK4sycl3_V16detail12sampler_impl19get_addressing_modeEv -_ZNK4sycl3_V16detail12sampler_impl33get_coordinate_normalization_modeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getElemSizeEv _ZNK4sycl3_V16detail16AccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail16AccessorBaseHost13isPlaceholderEv @@ -4231,6 +4129,7 @@ _ZNK4sycl3_V17context8get_infoINS0_4info7context32atomic_memory_scope_capabiliti _ZNK4sycl3_V17context8get_infoINS0_4info7context7devicesEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context8get_infoINS0_4info7context8platformEEENS0_6detail20is_context_info_descIT_E11return_typeEv _ZNK4sycl3_V17context9getNativeEv +_ZNK4sycl3_V17handler11eventNeededEv _ZNK4sycl3_V17handler15getCommandGraphEv _ZNK4sycl3_V17handler17getContextImplPtrEv _ZNK4sycl3_V17handler27isStateExplicitKernelBundleEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index ceba4356bee64..9b80d2eb69c8b 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -17,9 +17,7 @@ ??$getPlugin@$00@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ ??$getPlugin@$01@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ ??$getPlugin@$02@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ -??$getPlugin@$04@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ ??$getPlugin@$05@pi@detail@_V1@sycl@@YAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ -??$getPluginOpaqueData@$04@detail@_V1@sycl@@YAPEAXPEAX@Z ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@context@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ??$get_backend_info@Ubackend_version@device@info@_V1@sycl@@@event@_V1@sycl@@QEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ @@ -466,30 +464,14 @@ ??0AccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHHH_NAEBVproperty_list@23@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_K_NAEBVproperty_list@23@@Z -??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_NH4AEBVproperty_list@23@@Z ??0AccessorBaseHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_N_K4AEBVproperty_list@23@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHHH_NAEBVproperty_list@23@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_K_NAEBVproperty_list@23@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_NH4AEBVproperty_list@23@@Z -??0AccessorImplHost@detail@_V1@sycl@@QEAA@V?$id@$02@23@V?$range@$02@23@1W4mode@access@23@PEAXHH_N_K4AEBVproperty_list@23@@Z ??0HostProfilingInfo@detail@_V1@sycl@@QEAA@XZ ??0LocalAccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VLocalAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z ??0LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@V?$range@$02@23@HHAEBVproperty_list@23@@Z -??0LocalAccessorImplHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z -??0LocalAccessorImplHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0LocalAccessorImplHost@detail@_V1@sycl@@QEAA@V?$range@$02@23@HHAEBVproperty_list@23@@Z ??0SYCLCategory@detail@_V1@sycl@@QEAA@XZ -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@Vevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@_KVevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@_NVevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@_NVevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@W4_pi_image_channel_order@@W4_pi_image_channel_type@@V?$range@$02@23@I0@Z -??0SYCLMemObjT@detail@_V1@sycl@@QEAA@_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z ??0SampledImageAccessorBaseHost@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@VSampledImageAccessorImplHost@detail@_V1@sycl@@@std@@@Z ??0SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z ??0SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@AEBV0123@@Z @@ -501,15 +483,6 @@ ??0accelerator_selector@_V1@sycl@@QEAA@$$QEAV012@@Z ??0accelerator_selector@_V1@sycl@@QEAA@AEBV012@@Z ??0accelerator_selector@_V1@sycl@@QEAA@XZ -??0buffer_impl@detail@_V1@sycl@@QEAA@AEBV?$function@$$A6AXPEAX@Z@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@Vevent@23@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@_KV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@Vevent@23@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@PEAX_K1AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@PEBX_K1AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@_K0AEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@_NVevent@23@@Z -??0buffer_impl@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@_KV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@Vevent@23@@Z ??0buffer_plain@detail@_V1@sycl@@IEAA@AEBV?$function@$$A6AXPEAX@Z@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z ??0buffer_plain@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@$$CBX@std@@_K_KAEBVproperty_list@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@_N@Z ??0buffer_plain@detail@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z @@ -597,23 +570,12 @@ ??0half@host_half_impl@detail@_V1@sycl@@QEAA@G@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N@Z +??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N1@Z ??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N@Z +??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N1@Z ??0host_selector@_V1@sycl@@QEAA@$$QEAV012@@Z ??0host_selector@_V1@sycl@@QEAA@AEBV012@@Z ??0host_selector@_V1@sycl@@QEAA@XZ -??0image_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@W4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@EAEBVproperty_list@23@_N@Z -??0image_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@W4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@EAEBVproperty_list@23@_N@Z -??0image_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@W4image_channel_order@23@W4image_channel_type@23@Uimage_sampler@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@$$CBX@std@@W4image_channel_order@23@W4image_channel_type@23@Uimage_sampler@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@5@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEAU_cl_mem@@AEBVcontext@23@Vevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@E@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEAXW4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEAXW4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEBXW4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEBXW4image_channel_order@23@W4image_channel_type@23@Uimage_sampler@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@PEBXW4image_channel_order@23@W4image_channel_type@23@Uimage_sampler@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@W4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@AEBV?$range@$01@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@W4image_channel_order@23@W4image_channel_type@23@AEBV?$range@$02@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EAEBVproperty_list@23@@Z -??0image_impl@detail@_V1@sycl@@QEAA@_KAEBVcontext@23@Vevent@23@V?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@EW4image_channel_order@23@W4image_channel_type@23@_NV?$range@$02@23@@Z ??0image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z ??0image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ??0image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBUimage_descriptor@12345@AEBVqueue@45@@Z @@ -647,6 +609,10 @@ ??0kernel_id@_V1@sycl@@AEAA@PEBD@Z ??0kernel_id@_V1@sycl@@QEAA@$$QEAV012@@Z ??0kernel_id@_V1@sycl@@QEAA@AEBV012@@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV012345@@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVqueue@45@_K@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVdevice@45@AEBVcontext@45@_K@Z ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z @@ -680,29 +646,19 @@ ??0sampler@_V1@sycl@@QEAA@AEBV012@@Z ??0sampler@_V1@sycl@@QEAA@PEAU_cl_sampler@@AEBVcontext@12@@Z ??0sampler@_V1@sycl@@QEAA@W4coordinate_normalization_mode@12@W4addressing_mode@12@W4filtering_mode@12@AEBVproperty_list@12@@Z -??0sampler_impl@detail@_V1@sycl@@QEAA@PEAU_cl_sampler@@AEBVcontext@23@@Z -??0sampler_impl@detail@_V1@sycl@@QEAA@W4coordinate_normalization_mode@23@W4addressing_mode@23@W4filtering_mode@23@AEBVproperty_list@23@@Z ??0stream@_V1@sycl@@AEAA@V?$shared_ptr@Vstream_impl@detail@_V1@sycl@@@std@@V?$accessor@D$00$0EAC@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@12@V?$accessor@I$00$0EAF@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@12@1@Z ??0stream@_V1@sycl@@QEAA@$$QEAV012@@Z ??0stream@_V1@sycl@@QEAA@AEBV012@@Z ??0stream@_V1@sycl@@QEAA@_K0AEAVhandler@12@@Z ??0stream@_V1@sycl@@QEAA@_K0AEAVhandler@12@AEBVproperty_list@12@@Z -??0stream_impl@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z -??0stream_impl@detail@_V1@sycl@@QEAA@AEBV0123@@Z -??0stream_impl@detail@_V1@sycl@@QEAA@_K0AEAVhandler@23@@Z -??0stream_impl@detail@_V1@sycl@@QEAA@_K0AEBVproperty_list@23@@Z ??0tls_code_loc_t@detail@_V1@sycl@@QEAA@AEBUcode_location@123@@Z ??0tls_code_loc_t@detail@_V1@sycl@@QEAA@XZ ??1AccessorBaseHost@detail@_V1@sycl@@QEAA@XZ -??1AccessorImplHost@detail@_V1@sycl@@QEAA@XZ ??1LocalAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ -??1LocalAccessorImplHost@detail@_V1@sycl@@QEAA@XZ ??1SYCLCategory@detail@_V1@sycl@@UEAA@XZ -??1SYCLMemObjT@detail@_V1@sycl@@UEAA@XZ ??1SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ ??1UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAA@XZ ??1accelerator_selector@_V1@sycl@@UEAA@XZ -??1buffer_impl@detail@_V1@sycl@@UEAA@XZ ??1buffer_plain@detail@_V1@sycl@@QEAA@XZ ??1context@_V1@sycl@@QEAA@XZ ??1cpu_selector@_V1@sycl@@UEAA@XZ @@ -721,21 +677,19 @@ ??1gpu_selector@_V1@sycl@@UEAA@XZ ??1handler@_V1@sycl@@AEAA@XZ ??1host_selector@_V1@sycl@@UEAA@XZ -??1image_impl@detail@_V1@sycl@@UEAA@XZ ??1image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1image_mem_impl@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1image_plain@detail@_V1@sycl@@QEAA@XZ ??1kernel@_V1@sycl@@QEAA@XZ ??1kernel_bundle_plain@detail@_V1@sycl@@QEAA@XZ ??1kernel_id@_V1@sycl@@QEAA@XZ +??1physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1node@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1platform@_V1@sycl@@QEAA@XZ ??1queue@_V1@sycl@@QEAA@XZ ??1sampler@_V1@sycl@@QEAA@XZ -??1sampler_impl@detail@_V1@sycl@@QEAA@XZ ??1stream@_V1@sycl@@QEAA@XZ -??1stream_impl@detail@_V1@sycl@@QEAA@XZ ??1tls_code_loc_t@detail@_V1@sycl@@QEAA@XZ ??4?$OwnerLessBase@Vcontext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vcontext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z @@ -747,6 +701,8 @@ ??4?$OwnerLessBase@Vkernel@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z +??4?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z +??4?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z @@ -757,15 +713,10 @@ ??4?$hash@Vqueue@_V1@sycl@@@std@@QEAAAEAU01@AEBU01@@Z ??4AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z -??4AccessorImplHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4HostProfilingInfo@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4HostProfilingInfo@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4LocalAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4LocalAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z -??4LocalAccessorImplHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z -??4LocalAccessorImplHost@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z -??4MemoryManager@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z -??4MemoryManager@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4OSUtil@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4OSUtil@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4SampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z @@ -817,6 +768,8 @@ ??4kernel_bundle_plain@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4kernel_id@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z ??4kernel_id@_V1@sycl@@QEAAAEAV012@AEBV012@@Z +??4physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z +??4physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@AEBV012345@@Z ??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z ??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z ??4node@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z @@ -829,8 +782,6 @@ ??4sampler@_V1@sycl@@QEAAAEAV012@AEBV012@@Z ??4stream@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z ??4stream@_V1@sycl@@QEAAAEAV012@AEBV012@@Z -??4stream_impl@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z -??4stream_impl@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4tls_code_loc_t@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??8context@_V1@sycl@@QEBA_NAEBV012@@Z ??8device@_V1@sycl@@QEBA_NAEBV012@@Z @@ -840,6 +791,7 @@ ??8kernel@_V1@sycl@@QEBA_NAEBV012@@Z ??8kernel_bundle_plain@detail@_V1@sycl@@QEBA_NAEBV0123@@Z ??8kernel_id@_V1@sycl@@QEBA_NAEBV012@@Z +??8physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_NAEBV012345@@Z ??8platform@_V1@sycl@@QEBA_NAEBV012@@Z ??8queue@_V1@sycl@@QEBA_NAEBV012@@Z ??8sampler@_V1@sycl@@QEBA_NAEBV012@@Z @@ -852,6 +804,7 @@ ??9kernel@_V1@sycl@@QEBA_NAEBV012@@Z ??9kernel_bundle_plain@detail@_V1@sycl@@QEBA_NAEBV0123@@Z ??9kernel_id@_V1@sycl@@QEBA_NAEBV012@@Z +??9physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_NAEBV012345@@Z ??9platform@_V1@sycl@@QEBA_NAEBV012@@Z ??9queue@_V1@sycl@@QEBA_NAEBV012@@Z ??9sampler@_V1@sycl@@QEBA_NAEBV012@@Z @@ -875,9 +828,7 @@ ??Zhalf@host_half_impl@detail@_V1@sycl@@QEAAAEAV01234@AEBV01234@@Z ??_0half@host_half_impl@detail@_V1@sycl@@QEAAAEAV01234@AEBV01234@@Z ??_7SYCLCategory@detail@_V1@sycl@@6B@ -??_7SYCLMemObjT@detail@_V1@sycl@@6B@ ??_7accelerator_selector@_V1@sycl@@6B@ -??_7buffer_impl@detail@_V1@sycl@@6B@ ??_7cpu_selector@_V1@sycl@@6B@ ??_7default_selector@_V1@sycl@@6B@ ??_7device_selector@_V1@sycl@@6B@ @@ -886,7 +837,6 @@ ??_7filter_selector@oneapi@ext@_V1@sycl@@6B@ ??_7gpu_selector@_V1@sycl@@6B@ ??_7host_selector@_V1@sycl@@6B@ -??_7image_impl@detail@_V1@sycl@@6B@ ??_8exception@_V1@sycl@@7B@ ??_Dexception@_V1@sycl@@QEAAXXZ ??_Fcontext@_V1@sycl@@QEAAXXZ @@ -898,7 +848,6 @@ ?GDBMethodsAnchor@SampledImageAccessorBaseHost@detail@_V1@sycl@@IEAAXXZ ?GDBMethodsAnchor@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@IEAAXXZ ?GetRangeRoundingSettings@handler@_V1@sycl@@AEAAXAEA_K00@Z -?OffsetSize@stream_impl@detail@_V1@sycl@@0_KB ?PushBack@exception_list@_V1@sycl@@AEAAX$$QEAVexception_ptr@std@@@Z ?PushBack@exception_list@_V1@sycl@@AEAAXAEBVexception_ptr@std@@@Z ?RangeRoundingTrace@handler@_V1@sycl@@AEAA_NXZ @@ -3936,9 +3885,6 @@ ?__trunc_impl@_V1@sycl@@YAMM@Z ?__trunc_impl@_V1@sycl@@YANN@Z ?accelerator_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z -?accessGlobalBuf@stream_impl@detail@_V1@sycl@@QEAA?AV?$accessor@D$00$0EAC@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@34@AEAVhandler@34@@Z -?accessGlobalFlushBuf@stream_impl@detail@_V1@sycl@@QEAA?AV?$accessor@D$00$0EAC@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@34@AEAVhandler@34@@Z -?accessGlobalOffset@stream_impl@detail@_V1@sycl@@QEAA?AV?$accessor@I$00$0EAF@$0HNO@$0A@V?$accessor_property_list@$$V@oneapi@ext@_V1@sycl@@@34@AEAVhandler@34@@Z ?add@device_global_map@detail@_V1@sycl@@YAXPEBXPEBD@Z ?add@host_pipe_map@detail@_V1@sycl@@YAXPEBXPEBD@Z ?add@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA?AVnode@34567@AEBVproperty_list@67@@Z @@ -3949,13 +3895,9 @@ ?addHostUnsampledImageAccessorAndWait@detail@_V1@sycl@@YAXPEAVUnsampledImageAccessorImplHost@123@@Z ?addImpl@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA?AVnode@34567@AEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z ?addImpl@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA?AVnode@34567@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z -?addInteropObject@buffer_impl@detail@_V1@sycl@@QEBAXAEAV?$vector@_KV?$allocator@_K@std@@@std@@@Z -?addOrReplaceAccessorProperties@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBVproperty_list@34@@Z ?addOrReplaceAccessorProperties@buffer_plain@detail@_V1@sycl@@IEAAXAEBVproperty_list@34@@Z ?addReduction@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@$$CBX@std@@@Z ?addStream@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vstream_impl@detail@_V1@sycl@@@std@@@Z -?advise_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KW4_pi_mem_advice@@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?advise_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KW4_pi_mem_advice@@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?alignedAlloc@OSUtil@detail@_V1@sycl@@SAPEAX_K0@Z ?alignedFree@OSUtil@detail@_V1@sycl@@SAXPEAX@Z ?aligned_alloc@_V1@sycl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBUcode_location@detail@12@@Z @@ -3978,18 +3920,6 @@ ?alloc_image_mem@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?alloc_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?alloc_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z -?allocate@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@_NPEAXV?$vector@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@V?$allocator@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@2@@6@AEAPEAU_pi_event@@@Z -?allocateBufferObject@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX_N_KAEBVproperty_list@34@@Z -?allocateHostMem@SYCLMemObjT@detail@_V1@sycl@@UEAAPEAXXZ -?allocateHostMemory@MemoryManager@detail@_V1@sycl@@SAPEAXPEAVSYCLMemObjI@234@PEAX_N_KAEBVproperty_list@34@@Z -?allocateImageObject@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX_NAEBU_pi_image_desc@@AEBU_pi_image_format@@AEBVproperty_list@34@@Z -?allocateInteropMemObject@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAXAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@AEBV56@AEBVproperty_list@34@AEAPEAU_pi_event@@@Z -?allocateMem@SYCLMemObjT@detail@_V1@sycl@@UEAAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_NPEAXAEAPEAU_pi_event@@@Z -?allocateMem@buffer_impl@detail@_V1@sycl@@UEAAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_NPEAXAEAPEAU_pi_event@@@Z -?allocateMem@image_impl@detail@_V1@sycl@@UEAAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_NPEAXAEAPEAU_pi_event@@@Z -?allocateMemBuffer@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@PEAX_N_KAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@AEBV56@AEBVproperty_list@34@AEAPEAU_pi_event@@@Z -?allocateMemImage@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@PEAX_N_KAEBU_pi_image_desc@@AEBU_pi_image_format@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@AEBV56@AEBVproperty_list@34@AEAPEAU_pi_event@@@Z -?allocateMemSubBuffer@MemoryManager@detail@_V1@sycl@@SAPEAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX_K2V?$range@$02@34@V?$vector@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@V?$allocator@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@2@@6@AEAPEAU_pi_event@@@Z ?aspect_selector@_V1@sycl@@YA?AV?$function@$$A6AHAEBVdevice@_V1@sycl@@@Z@std@@AEBV?$vector@W4aspect@_V1@sycl@@V?$allocator@W4aspect@_V1@sycl@@@std@@@4@0@Z ?assertion@pi@detail@_V1@sycl@@YAX_NPEBD@Z ?associateWithHandler@detail@_V1@sycl@@YAXAEAVhandler@23@PEAVAccessorBaseHost@123@W4target@access@23@@Z @@ -4005,36 +3935,16 @@ ?begin_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@AEBVproperty_list@67@@Z ?build_from_source@detail@experimental@oneapi@ext@_V1@sycl@@YA?AV?$kernel_bundle@$01@56@AEAV?$kernel_bundle@$02@56@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@V?$allocator@V?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@2@@std@@PEAV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?build_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z -?canReadHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NPEAX_K@Z -?canReuseHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NPEAX_K@Z ?cancel_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ ?category@exception@_V1@sycl@@QEBAAEBVerror_category@std@@XZ -?checkImageDesc@image_impl@detail@_V1@sycl@@AEAA_NAEBU_pi_image_desc@@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX@Z -?checkImageFormat@image_impl@detail@_V1@sycl@@AEAA_NAEBU_pi_image_format@@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@@Z ?code@exception@_V1@sycl@@QEBAAEBVerror_code@std@@XZ ?compile_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z ?complete_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAA?AVevent@56@AEBVproperty_list@56@@Z ?computeFallbackKernelBounds@handler@_V1@sycl@@AEAA?AV?$id@$01@23@_K0@Z -?constructorNotification@buffer_impl@detail@_V1@sycl@@QEAAXAEBUcode_location@234@PEAXPEBX2IIQEA_K@Z ?constructorNotification@buffer_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBX2IIQEA_K@Z ?constructorNotification@detail@_V1@sycl@@YAXPEAX0W4target@access@23@W4mode@523@AEBUcode_location@123@@Z ?contains_specialization_constants@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ ?contextSetExtendedDeleter@pi@detail@_V1@sycl@@YAXAEBVcontext@34@P6AXPEAX@Z1@Z -?convertChannelOrder@detail@_V1@sycl@@YA?AW4_pi_image_channel_order@@W4image_channel_order@23@@Z -?convertChannelOrder@detail@_V1@sycl@@YA?AW4image_channel_order@23@W4_pi_image_channel_order@@@Z -?convertChannelType@detail@_V1@sycl@@YA?AW4_pi_image_channel_type@@W4image_channel_type@23@@Z -?convertChannelType@detail@_V1@sycl@@YA?AW4image_channel_type@23@W4_pi_image_channel_type@@@Z -?copy@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@IV?$range@$02@34@3V?$id@$02@34@I12I334IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z -?copy@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@IV?$range@$02@34@3V?$id@$02@34@I12I334IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z -?copy_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEBX_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@PEAX111V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?copy_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEBX_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@PEAX111V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?copy_from_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K3PEAXAEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?copy_from_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K3PEAXAEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?copy_image_bindless@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@0AEBU_pi_image_desc@@AEBU_pi_image_format@@W4_pi_image_copy_flags@@Upi_image_offset_struct@@5Upi_image_region_struct@@6AEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?copy_to_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K30AEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?copy_to_device_global@MemoryManager@detail@_V1@sycl@@SAXPEBX_NV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K30AEBV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?copy_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEAXV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?copy_usm@MemoryManager@detail@_V1@sycl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEAXV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?cpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z @@ -4050,7 +3960,6 @@ ?create_kernel_bundle_from_source@experimental@oneapi@ext@_V1@sycl@@YA?AV?$kernel_bundle@$02@45@AEBVcontext@45@W4source_language@12345@AEBV?$vector@W4byte@std@@V?$allocator@W4byte@std@@@2@@std@@@Z ?default_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z ?deleteAccProps@buffer_plain@detail@_V1@sycl@@IEAAXAEBW4PropWithDataKind@234@@Z -?deleteAccessorProperty@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBW4PropWithDataKind@234@@Z ?depends_on@handler@_V1@sycl@@IEAAXAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@Z ?depends_on@handler@_V1@sycl@@IEAAXAEBV?$vector@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@V?$allocator@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@2@@std@@@Z ?depends_on@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z @@ -4061,9 +3970,6 @@ ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUsampled_image_handle@12345@AEBVqueue@45@@Z ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUunsampled_image_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?destroy_image_handle@experimental@oneapi@ext@_V1@sycl@@YAXAEAUunsampled_image_handle@12345@AEBVqueue@45@@Z -?destructorNotification@buffer_impl@detail@_V1@sycl@@QEAAXPEAX@Z -?detachMemoryObject@SYCLMemObjT@detail@_V1@sycl@@QEBAXAEBV?$shared_ptr@VSYCLMemObjT@detail@_V1@sycl@@@std@@@Z -?determineHostPtr@SYCLMemObjT@detail@_V1@sycl@@IEAAXAEBV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_NAEAPEAXAEA_N@Z ?device_has@queue@_V1@sycl@@AEBA_NW4aspect@23@@Z ?die@pi@detail@_V1@sycl@@YAXPEBD@Z ?discard_or_return@queue@_V1@sycl@@AEAA?AVevent@23@AEBV423@@Z @@ -4075,12 +3981,12 @@ ?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEAVqueue@67@@Z ?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$vector@Vqueue@_V1@sycl@@V?$allocator@Vqueue@_V1@sycl@@@std@@@std@@@Z ?end_recording@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXXZ +?eventNeeded@handler@_V1@sycl@@AEBA_NXZ ?ext_codeplay_supports_fusion@queue@_V1@sycl@@QEBA_NXZ ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_intel_read_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEAX_K_N@Z ?ext_intel_write_host_pipe@handler@_V1@sycl@@AEAAXVstring_view@detail@23@PEAX_K_N@Z -?ext_oneapi_advise_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEBX_KW4_pi_mem_advice@@V?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4arch_category@experimental@oneapi@ext@23@@Z ?ext_oneapi_architecture_is@device@_V1@sycl@@QEAA_NW4architecture@experimental@oneapi@ext@23@@Z ?ext_oneapi_barrier@handler@_V1@sycl@@QEAAXAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z @@ -4116,16 +4022,10 @@ ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?ext_oneapi_copy@queue@_V1@sycl@@QEAA?AVevent@23@Uimage_mem_handle@experimental@oneapi@ext@23@V?$range@$02@23@AEBUimage_descriptor@67823@PEAX111V423@AEBUcode_location@detail@23@@Z -?ext_oneapi_copyD2D_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAXIV?$range@$02@34@4V?$id@$02@34@I3I445IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_copyD2H_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAXIV?$range@$02@34@4V?$id@$02@34@IPEADI45IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_copyH2D_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEADIV?$range@$02@34@V?$id@$02@34@IPEAXI445IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_copy_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEBXPEAU_pi_ext_command_buffer@@_KPEAXV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_disable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_empty@queue@_V1@sycl@@QEBA_NXZ ?ext_oneapi_enable_peer_access@device@_V1@sycl@@QEAAXAEBV123@@Z ?ext_oneapi_fill2d_impl@handler@_V1@sycl@@AEAAXPEAX_KPEBX111@Z -?ext_oneapi_fill_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAVSYCLMemObjI@234@PEAX_KPEBDIV?$range@$02@34@6V?$id@$02@34@IV?$vector@IV?$allocator@I@std@@@6@PEAI@Z -?ext_oneapi_fill_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KHV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_get_composite_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ ?ext_oneapi_get_default_context@platform@_V1@sycl@@QEBA?AVcontext@23@XZ ?ext_oneapi_get_graph@queue@_V1@sycl@@QEBA?AV?$command_graph@$0A@@experimental@oneapi@ext@23@XZ @@ -4149,13 +4049,14 @@ ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVkernel@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vkernel_id@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVkernel_id@34@@Z +?ext_oneapi_owner_before@?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@2oneapi@ext@34@@Z +?ext_oneapi_owner_before@?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVphysical_mem@experimental@oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vplatform@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVplatform@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vqueue@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVqueue@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vstream@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vstream@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVstream@34@@Z -?ext_oneapi_prefetch_usm_cmd_buffer@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAU_pi_ext_command_buffer@@PEAX_KV?$vector@IV?$allocator@I@std@@@6@PEAI@Z ?ext_oneapi_prod@queue@_V1@sycl@@QEAAXXZ ?ext_oneapi_set_external_event@queue@_V1@sycl@@QEAAXAEBVevent@23@@Z ?ext_oneapi_signal_external_semaphore@handler@_V1@sycl@@QEAAXUinterop_semaphore_handle@experimental@oneapi@ext@23@@Z @@ -4181,26 +4082,20 @@ ?ext_oneapi_wait_external_semaphore@queue@_V1@sycl@@QEAA?AVevent@23@Uinterop_semaphore_handle@experimental@oneapi@ext@23@_KV423@AEBUcode_location@detail@23@@Z ?extractArgsAndReqs@handler@_V1@sycl@@AEAAXXZ ?extractArgsAndReqsFromLambda@handler@_V1@sycl@@AEAAXPEAD_KPEBUkernel_param_desc_t@detail@23@_N@Z -?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z -?fill@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@7@@Z -?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?fill_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22AEBV?$vector@DV?$allocator@D@std@@@6@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?fill_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?finalize@handler@_V1@sycl@@AEAA?AVevent@23@XZ ?finalize@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$command_graph@$00@34567@AEBVproperty_list@67@@Z ?finalizeImpl@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXXZ ?find_device_intersection@detail@_V1@sycl@@YA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@@Z -?flush@stream_impl@detail@_V1@sycl@@QEAAXAEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@Z -?flush@stream_impl@detail@_V1@sycl@@QEAAXXZ ?free@_V1@sycl@@YAXPEAXAEBVcontext@12@AEBUcode_location@detail@12@@Z ?free@_V1@sycl@@YAXPEAXAEBVqueue@12@AEBUcode_location@detail@12@@Z -?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z +?free_virtual_mem@experimental@oneapi@ext@_V1@sycl@@YAX_K0AEBVcontext@45@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVqueue@45@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVdevice@45@AEBVcontext@45@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVqueue@45@@Z +?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVqueue@45@@Z +?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?frexp_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAH@Z ?frexp_impl@detail@_V1@sycl@@YAMMPEAH@Z ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z @@ -4215,35 +4110,23 @@ ?getAccessRange@AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV?$range@$02@34@XZ ?getAccessRange@AccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$range@$02@34@XZ ?getBorderColor@detail@_V1@sycl@@YA?AV?$vec@M$03@23@W4image_channel_order@23@@Z -?getBufSizeForContext@SYCLMemObjT@detail@_V1@sycl@@SA_KAEBV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@_K@Z ?getChannelOrder@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AW4image_channel_order@34@XZ ?getChannelOrder@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AW4image_channel_order@34@XZ -?getChannelOrder@image_impl@detail@_V1@sycl@@QEBA?AW4image_channel_order@34@XZ ?getChannelOrder@image_plain@detail@_V1@sycl@@IEBA?AW4image_channel_order@34@XZ ?getChannelType@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AW4image_channel_type@34@XZ ?getChannelType@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AW4image_channel_type@34@XZ -?getChannelType@image_impl@detail@_V1@sycl@@QEBA?AW4image_channel_type@34@XZ ?getChannelType@image_plain@detail@_V1@sycl@@IEBA?AW4image_channel_type@34@XZ ?getCommandGraph@handler@_V1@sycl@@AEBA?AV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@XZ ?getContextImplPtr@handler@_V1@sycl@@AEBAAEBV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@XZ ?getCurrentDSODir@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ?getDeviceFromHandler@detail@_V1@sycl@@YA?AVdevice@23@AEAVhandler@23@@Z -?getDevices@image_impl@detail@_V1@sycl@@AEAA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@6@@Z ?getDirName@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBD@Z -?getESIMDDeviceInterface@detail@_V1@sycl@@YAPEAUESIMDDeviceInterface@123@XZ ?getElemSize@AccessorBaseHost@detail@_V1@sycl@@QEBAIXZ ?getElementSize@LocalAccessorBaseHost@detail@_V1@sycl@@QEAAHXZ ?getElementSize@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAHXZ ?getElementSize@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAHXZ -?getElementSize@image_impl@detail@_V1@sycl@@QEBA_KXZ ?getElementSize@image_plain@detail@_V1@sycl@@IEBA_KXZ ?getEndTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ -?getImageDesc@image_impl@detail@_V1@sycl@@AEAA?AU_pi_image_desc@@_N@Z -?getImageElementSize@detail@_V1@sycl@@YAEEW4image_channel_type@23@@Z -?getImageFormat@image_impl@detail@_V1@sycl@@AEAA?AU_pi_image_format@@XZ -?getImageNumberChannels@detail@_V1@sycl@@YAEW4image_channel_order@23@@Z -?getImageType@image_impl@detail@_V1@sycl@@AEAA?AW4_pi_mem_type@@XZ -?getInteropContext@SYCLMemObjT@detail@_V1@sycl@@UEBA?AV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@XZ ?getKernelName@handler@_V1@sycl@@AEAA?AVstring@detail@23@XZ ?getMaxWorkGroups@handler@_V1@sycl@@AEAA?AV?$optional@V?$array@_K$02@std@@@std@@XZ ?getMaxWorkGroups_v2@handler@_V1@sycl@@AEAA?AV?$tuple@V?$array@_K$02@std@@_N@std@@XZ @@ -4264,7 +4147,6 @@ ?getNativeImpl@kernel@_V1@sycl@@AEBA_KXZ ?getNativeMem@interop_handle@_V1@sycl@@AEBA_KPEAVAccessorImplHost@detail@23@@Z ?getNativeQueue@interop_handle@_V1@sycl@@AEBA_KAEAH@Z -?getNativeVector@buffer_impl@detail@_V1@sycl@@QEBA?AV?$vector@_KV?$allocator@_K@std@@@std@@W4backend@34@@Z ?getNativeVector@buffer_plain@detail@_V1@sycl@@IEBA?AV?$vector@_KV?$allocator@_K@std@@@std@@W4backend@34@@Z ?getNativeVector@event@_V1@sycl@@AEBA?AV?$vector@_KV?$allocator@_K@std@@@std@@XZ ?getNumOfDims@LocalAccessorBaseHost@detail@_V1@sycl@@QEAAHXZ @@ -4273,14 +4155,12 @@ ?getOSMemSize@OSUtil@detail@_V1@sycl@@SA_KXZ ?getOffset@AccessorBaseHost@detail@_V1@sycl@@QEAAAEAV?$id@$02@34@XZ ?getOffset@AccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$id@$02@34@XZ -?getOrCreateSampler@sampler_impl@detail@_V1@sycl@@QEAAPEAU_pi_sampler@@AEBVcontext@34@@Z ?getOrInsertHandlerKernelBundle@handler@_V1@sycl@@AEBA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@_N@Z ?getOrWaitEvents@detail@_V1@sycl@@YA?AV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@std@@V?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@5@V?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@5@@Z ?getPitch@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AV?$id@$02@34@XZ ?getPitch@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AV?$id@$02@34@XZ ?getPixelCoordLinearFiltMode@detail@_V1@sycl@@YA?AV?$vec@H$07@23@V?$vec@M$03@23@W4addressing_mode@23@V?$range@$02@23@AEAV523@@Z ?getPixelCoordNearestFiltMode@detail@_V1@sycl@@YA?AV?$vec@H$03@23@V?$vec@M$03@23@W4addressing_mode@23@V?$range@$02@23@@Z -?getPlugin@SYCLMemObjT@detail@_V1@sycl@@QEBAAEBV?$shared_ptr@Vplugin@detail@_V1@sycl@@@std@@XZ ?getPropList@AccessorBaseHost@detail@_V1@sycl@@QEBAAEBVproperty_list@34@XZ ?getPropList@LocalAccessorBaseHost@detail@_V1@sycl@@QEBAAEBVproperty_list@34@XZ ?getPropList@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAAEBVproperty_list@34@XZ @@ -4293,29 +4173,20 @@ ?getPtr@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAPEAXXZ ?getPtr@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEAAPEAXXZ ?getPtr@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAPEAXXZ -?getRowPitch@image_impl@detail@_V1@sycl@@QEBA_KXZ ?getRowPitch@image_plain@detail@_V1@sycl@@IEBA_KXZ ?getSampler@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBA?AUimage_sampler@34@XZ -?getSampler@image_impl@detail@_V1@sycl@@QEBA?AUimage_sampler@34@XZ ?getSampler@image_plain@detail@_V1@sycl@@IEBA?AUimage_sampler@34@XZ ?getSize@LocalAccessorBaseHost@detail@_V1@sycl@@QEAAAEAV?$range@$02@34@XZ ?getSize@LocalAccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$range@$02@34@XZ ?getSize@SampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$range@$02@34@XZ ?getSize@UnsampledImageAccessorBaseHost@detail@_V1@sycl@@QEBAAEBV?$range@$02@34@XZ ?getSize@buffer_plain@detail@_V1@sycl@@IEBA_KXZ -?getSizeInBytes@SYCLMemObjT@detail@_V1@sycl@@UEBA_KXZ -?getSlicePitch@image_impl@detail@_V1@sycl@@QEBA_KXZ ?getSlicePitch@image_plain@detail@_V1@sycl@@IEBA_KXZ ?getStartTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ -?getType@SYCLMemObjT@detail@_V1@sycl@@UEBA?AW4MemObjType@SYCLMemObjI@234@XZ -?getType@buffer_impl@detail@_V1@sycl@@UEBA?AW4MemObjType@SYCLMemObjI@234@XZ ?getType@handler@_V1@sycl@@AEAA?AW4CGTYPE@CG@detail@23@XZ -?getType@image_impl@detail@_V1@sycl@@UEBA?AW4MemObjType@SYCLMemObjI@234@XZ -?getUserPtr@SYCLMemObjT@detail@_V1@sycl@@QEBAPEAXXZ ?getValueFromDynamicParameter@detail@_V1@sycl@@YAPEAXAEAVdynamic_parameter_base@1experimental@oneapi@ext@23@@Z +?get_access_mode@experimental@oneapi@ext@_V1@sycl@@YA?AW4address_access_mode@12345@PEBX_KAEBVcontext@45@@Z ?get_addressing_mode@sampler@_V1@sycl@@QEBA?AW4addressing_mode@23@XZ -?get_addressing_mode@sampler_impl@detail@_V1@sycl@@QEBA?AW4addressing_mode@34@XZ -?get_allocator_internal@SYCLMemObjT@detail@_V1@sycl@@QEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ ?get_allocator_internal@buffer_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ ?get_allocator_internal@image_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ ?get_backend@context@_V1@sycl@@QEBA?AW4backend@23@XZ @@ -4334,13 +4205,12 @@ ?get_context@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVcontext@56@XZ ?get_context@kernel@_V1@sycl@@QEBA?AVcontext@23@XZ ?get_context@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AVcontext@34@XZ +?get_context@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVcontext@56@XZ ?get_context@queue@_V1@sycl@@QEBA?AVcontext@23@XZ ?get_coordinate_normalization_mode@sampler@_V1@sycl@@QEBA?AW4coordinate_normalization_mode@23@XZ -?get_coordinate_normalization_mode@sampler_impl@detail@_V1@sycl@@QEBA?AW4coordinate_normalization_mode@34@XZ -?get_count@SYCLMemObjT@detail@_V1@sycl@@QEBA_KXZ -?get_count@image_impl@detail@_V1@sycl@@QEBA_KXZ ?get_count@image_plain@detail@_V1@sycl@@IEBA_KXZ ?get_descriptor@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAAEBUimage_descriptor@23456@XZ +?get_device@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVdevice@56@XZ ?get_device@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVdevice@56@XZ ?get_device@queue@_V1@sycl@@QEBA?AVdevice@23@XZ ?get_devices@context@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ @@ -4349,7 +4219,6 @@ ?get_devices@platform@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4device_type@info@23@@Z ?get_empty_interop_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@@Z ?get_filtering_mode@sampler@_V1@sycl@@QEBA?AW4filtering_mode@23@XZ -?get_filtering_mode@sampler_impl@detail@_V1@sycl@@QEBA?AW4filtering_mode@34@XZ ?get_flags@stream@_V1@sycl@@AEBAIXZ ?get_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@XZ ?get_image_channel_type@experimental@oneapi@ext@_V1@sycl@@YA?AW4image_channel_type@45@Uimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4367,7 +4236,8 @@ ?get_kernel_ids@_V1@sycl@@YA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ ?get_kernel_ids@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ ?get_max_statement_size@stream@_V1@sycl@@QEBA_KXZ -?get_max_statement_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ +?get_mem_granularity@experimental@oneapi@ext@_V1@sycl@@YA_KAEBVcontext@45@W4granularity_mode@12345@@Z +?get_mem_granularity@experimental@oneapi@ext@_V1@sycl@@YA_KAEBVdevice@45@AEBVcontext@45@W4granularity_mode@12345@@Z ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVdevice@45@AEBVcontext@45@@Z ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVqueue@45@@Z ?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z @@ -4376,7 +4246,6 @@ ?get_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ?get_num_channels@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAIXZ ?get_pipe_name@pipe_base@experimental@intel@ext@_V1@sycl@@KA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBX@Z -?get_pitch@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$01@34@XZ ?get_pitch@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$01@34@XZ ?get_platform@context@_V1@sycl@@QEBA?AVplatform@23@XZ ?get_platform@device@_V1@sycl@@QEBA?AVplatform@23@XZ @@ -4386,13 +4255,11 @@ ?get_precision@stream@_V1@sycl@@QEBA_KXZ ?get_predecessors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ?get_queue@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEBA?AVqueue@56@XZ -?get_range@image_impl@detail@_V1@sycl@@QEBA?AV?$range@$02@34@XZ ?get_range@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$range@$02@56@XZ ?get_range@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$02@34@XZ ?get_root_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ ?get_size@image_plain@detail@_V1@sycl@@IEBA_KXZ ?get_size@stream@_V1@sycl@@QEBA_KXZ -?get_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ ?get_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEBAXPEBDPEAX@Z ?get_stream_mode@stream@_V1@sycl@@QEBA?AW4stream_manipulator@23@XZ ?get_successors@node@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ @@ -4401,17 +4268,10 @@ ?get_wait_list@event@_V1@sycl@@QEAA?AV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@XZ ?get_width@stream@_V1@sycl@@QEBA_KXZ ?get_work_item_buffer_size@stream@_V1@sycl@@QEBA_KXZ -?get_work_item_buffer_size@stream_impl@detail@_V1@sycl@@QEBA_KXZ ?gpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z -?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXPEAX@Z@std@@_K_N@Z -?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$shared_ptr@X@std@@_K_N@Z -?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXPEAX_K@Z -?handleHostData@SYCLMemObjT@detail@_V1@sycl@@QEAAXPEBX_K@Z ?handleRelease@buffer_plain@detail@_V1@sycl@@IEBAXXZ -?handleWriteAccessorCreation@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?has@device@_V1@sycl@@QEBA_NW4aspect@23@@Z ?has@platform@_V1@sycl@@QEBA_NW4aspect@23@@Z -?hasUserDataPtr@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ ?has_context@exception@_V1@sycl@@QEBA_NXZ ?has_extension@device@_V1@sycl@@QEBA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?has_extension@opencl@_V1@sycl@@YA_NAEBVdevice@23@AEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z @@ -4424,20 +4284,16 @@ ?has_kernel_bundle_impl@detail@_V1@sycl@@YA_NAEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@6@W4bundle_state@23@@Z ?has_kernel_bundle_impl@detail@_V1@sycl@@YA_NAEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@W4bundle_state@23@@Z ?has_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEBA_NPEBD@Z -?initStreamHost@stream_impl@detail@_V1@sycl@@QEAAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@@Z ?internalProfilingTagImpl@handler@_V1@sycl@@AEAAXXZ ?isBackendSupportedFillSize@handler@_V1@sycl@@CA_N_K@Z ?isConstOrGlobal@handler@_V1@sycl@@CA_NW4target@access@23@@Z ?isDeviceGlobalUsedInKernel@detail@_V1@sycl@@YA_NPEBX@Z -?isHostPointerReadOnly@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ ?isImageOrImageArray@handler@_V1@sycl@@CA_NW4target@access@23@@Z -?isInterop@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ ?isMemoryObjectUsedByGraph@AccessorBaseHost@detail@_V1@sycl@@QEBA_NXZ ?isOutOfRange@detail@_V1@sycl@@YA_NV?$vec@H$03@23@W4addressing_mode@23@V?$range@$02@23@@Z ?isPathPresent@OSUtil@detail@_V1@sycl@@SA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?isPlaceholder@AccessorBaseHost@detail@_V1@sycl@@QEBA_NXZ ?isStateExplicitKernelBundle@handler@_V1@sycl@@AEBA_NXZ -?isUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEBA_NXZ ?isValidModeForDestinationAccessor@handler@_V1@sycl@@CA_NW4mode@access@23@@Z ?isValidModeForSourceAccessor@handler@_V1@sycl@@CA_NW4mode@access@23@@Z ?isValidTargetForExplicitOp@handler@_V1@sycl@@CA_NW4target@access@23@@Z @@ -4462,27 +4318,19 @@ ?lgamma_r_impl@detail@_V1@sycl@@YANNPEAH@Z ?link_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z ?makeDir@OSUtil@detail@_V1@sycl@@SAHPEBD@Z -?make_context@detail@_V1@sycl@@YA?AVcontext@23@_KAEBV?$function@$$A6AXVexception_list@_V1@sycl@@@Z@std@@W4backend@23@@Z -?make_context@level_zero@oneapi@ext@_V1@sycl@@YA?AVcontext@45@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@_K_N@Z -?make_context@opencl@_V1@sycl@@YA?AVcontext@23@_K@Z +?make_context@detail@_V1@sycl@@YA?AVcontext@23@_KAEBV?$function@$$A6AXVexception_list@_V1@sycl@@@Z@std@@W4backend@23@_NAEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@6@@Z ?make_device@detail@_V1@sycl@@YA?AVdevice@23@_KW4backend@23@@Z -?make_device@level_zero@oneapi@ext@_V1@sycl@@YA?AVdevice@45@AEBVplatform@45@_K@Z -?make_device@opencl@_V1@sycl@@YA?AVdevice@23@_K@Z +?make_device@detail@level_zero@oneapi@ext@_V1@sycl@@YA?AVdevice@56@AEBVplatform@56@_K@Z ?make_edge@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEAVnode@34567@0@Z ?make_error_code@_V1@sycl@@YA?AVerror_code@std@@W4errc@12@@Z ?make_event@detail@_V1@sycl@@YA?AVevent@23@_KAEBVcontext@23@W4backend@23@@Z ?make_event@detail@_V1@sycl@@YA?AVevent@23@_KAEBVcontext@23@_NW4backend@23@@Z -?make_event@level_zero@oneapi@ext@_V1@sycl@@YA?AVevent@45@AEBVcontext@45@_K_N@Z ?make_kernel@detail@_V1@sycl@@YA?AVkernel@23@AEBVcontext@23@AEBV?$kernel_bundle@$01@23@_K_NW4backend@23@@Z ?make_kernel@detail@_V1@sycl@@YA?AVkernel@23@_KAEBVcontext@23@W4backend@23@@Z ?make_kernel_bundle@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@_KAEBVcontext@23@W4bundle_state@23@W4backend@23@@Z ?make_kernel_bundle@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@_KAEBVcontext@23@_NW4bundle_state@23@W4backend@23@@Z ?make_platform@detail@_V1@sycl@@YA?AVplatform@23@_KW4backend@23@@Z -?make_platform@level_zero@oneapi@ext@_V1@sycl@@YA?AVplatform@45@_K@Z -?make_platform@opencl@_V1@sycl@@YA?AVplatform@23@_K@Z ?make_queue@detail@_V1@sycl@@YA?AVqueue@23@_KHAEBVcontext@23@PEBVdevice@23@_NAEBVproperty_list@23@AEBV?$function@$$A6AXVexception_list@_V1@sycl@@@Z@std@@W4backend@23@@Z -?make_queue@level_zero@oneapi@ext@_V1@sycl@@YA?AVqueue@45@AEBVcontext@45@AEBVdevice@45@_K_N3AEBVproperty_list@45@@Z -?make_queue@opencl@_V1@sycl@@YA?AVqueue@23@AEBVcontext@23@_K@Z ?malloc@_V1@sycl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBUcode_location@detail@12@@Z ?malloc@_V1@sycl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z ?malloc@_V1@sycl@@YAPEAX_KAEBVqueue@12@W4alloc@usm@12@AEBUcode_location@detail@12@@Z @@ -4499,20 +4347,19 @@ ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBUcode_location@detail@12@@Z ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z -?map@MemoryManager@detail@_V1@sycl@@SAPEAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@W4mode@access@34@IV?$range@$02@34@4V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z +?map@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBAPEAX_K0W4address_access_mode@23456@0@Z ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z -?markAsInternal@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ -?markBeingUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?markBufferAsInternal@detail@_V1@sycl@@YAXAEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z -?markNoLongerBeingUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ +?mem_advise@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAX_KHAEBUcode_location@detail@45@@Z ?mem_advise@handler@_V1@sycl@@QEAAXPEBX_KH@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHV423@AEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KW4_pi_mem_advice@@AEBUcode_location@detail@23@@Z +?memcpy@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAXPEBX_KAEBUcode_location@detail@45@@Z ?memcpy@handler@_V1@sycl@@QEAAXPEAXPEBX_K@Z ?memcpy@queue@_V1@sycl@@QEAA?AVevent@23@PEAXPEBX_KAEBUcode_location@detail@23@@Z ?memcpy@queue@_V1@sycl@@QEAA?AVevent@23@PEAXPEBX_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z @@ -4523,19 +4370,17 @@ ?memcpyToDeviceGlobal@handler@_V1@sycl@@AEAAXPEBX0_N_K2@Z ?memcpyToDeviceGlobal@queue@_V1@sycl@@AEAA?AVevent@23@PEAXPEBX_N_K3AEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@@Z ?memcpyToHostOnlyDeviceGlobal@handler@_V1@sycl@@AEAAXPEBX0_K_N11@Z +?memset@experimental@oneapi@ext@_V1@sycl@@YAXVqueue@45@PEAXH_KAEBUcode_location@detail@45@@Z ?memset@handler@_V1@sycl@@QEAAXPEAXH_K@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBUcode_location@detail@23@@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?memset@queue@_V1@sycl@@QEAA?AVevent@23@PEAXH_KV423@AEBUcode_location@detail@23@@Z -?memset_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22DV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?memset_2d_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K22DV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?message@SYCLCategory@detail@_V1@sycl@@UEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@Z ?modf_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAV45123@@Z ?modf_impl@detail@_V1@sycl@@YAMMPEAM@Z ?modf_impl@detail@_V1@sycl@@YANNPEAN@Z ?name@SYCLCategory@detail@_V1@sycl@@UEBAPEBDXZ ?native_specialization_constant@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ -?needsWriteBack@SYCLMemObjT@detail@_V1@sycl@@QEBA_NXZ ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$00@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$01@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$02@23@Vkernel@23@@Z @@ -4547,8 +4392,6 @@ ?prefetch@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KAEBUcode_location@detail@23@@Z ?prefetch@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z ?prefetch@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KV423@AEBUcode_location@detail@23@@Z -?prefetch_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z -?prefetch_usm@MemoryManager@detail@_V1@sycl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_KV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@AEBV?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@6@@Z ?prepare_for_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVcontext@45@@Z ?prepare_for_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVqueue@45@@Z ?print_graph@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBAXV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_N@Z @@ -4559,10 +4402,6 @@ ?reduGetMaxWGSize@detail@_V1@sycl@@YA_KV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z ?reduGetPreferredWGSize@detail@_V1@sycl@@YA_KAEAV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_K@Z ?registerDynamicParameter@handler@_V1@sycl@@AEAAXAEAVdynamic_parameter_base@detail@experimental@oneapi@ext@23@H@Z -?release@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@PEAXV?$vector@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@V?$allocator@V?$shared_ptr@Vevent_impl@detail@_V1@sycl@@@std@@@2@@6@AEAPEAU_pi_event@@@Z -?releaseHostMem@SYCLMemObjT@detail@_V1@sycl@@UEAAXPEAX@Z -?releaseMem@SYCLMemObjT@detail@_V1@sycl@@UEAAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAX@Z -?releaseMemObj@MemoryManager@detail@_V1@sycl@@SAXV?$shared_ptr@Vcontext_impl@detail@_V1@sycl@@@std@@PEAVSYCLMemObjI@234@PEAX2@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUinterop_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?release_external_memory@experimental@oneapi@ext@_V1@sycl@@YAXUinterop_mem_handle@12345@AEBVqueue@45@@Z ?release_from_device_copy@experimental@oneapi@ext@_V1@sycl@@YAXPEBXAEBVcontext@45@@Z @@ -4571,14 +4410,11 @@ ?remquo_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@0PEAH@Z ?remquo_impl@detail@_V1@sycl@@YAMMMPEAH@Z ?remquo_impl@detail@_V1@sycl@@YANNNPEAH@Z +?reserve_virtual_mem@experimental@oneapi@ext@_V1@sycl@@YA_K_K0AEBVcontext@45@@Z ?reset@filter_selector@ONEAPI@_V1@sycl@@QEBAXXZ ?reset@filter_selector@oneapi@ext@_V1@sycl@@QEBAXXZ -?resize@AccessorImplHost@detail@_V1@sycl@@QEAAX_K@Z -?resize@buffer_impl@detail@_V1@sycl@@QEAAX_K@Z ?sampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@PEBXIAEBUcode_location@123@@Z -?sampledImageConstructorNotification@image_impl@detail@_V1@sycl@@QEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@AEBUimage_sampler@34@@Z ?sampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@AEBUimage_sampler@34@@Z -?sampledImageDestructorNotification@image_impl@detail@_V1@sycl@@QEAAXPEAX@Z ?sampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z ?saveCodeLoc@handler@_V1@sycl@@AEAAXUcode_location@detail@23@@Z ?select_device@detail@_V1@sycl@@YA?AVdevice@23@AEBV?$function@$$A6AHAEBVdevice@_V1@sycl@@@Z@std@@@Z @@ -4586,7 +4422,6 @@ ?select_device@device_selector@_V1@sycl@@UEBA?AVdevice@23@XZ ?select_device@filter_selector@ONEAPI@_V1@sycl@@UEBA?AVdevice@34@XZ ?select_device@filter_selector@oneapi@ext@_V1@sycl@@UEBA?AVdevice@45@XZ -?setAlign@SYCLMemObjT@detail@_V1@sycl@@QEAAX_K@Z ?setArgHelper@handler@_V1@sycl@@AEAAXH$$QEAVsampler@23@@Z ?setArgsHelper@handler@_V1@sycl@@AEAAXH@Z ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@@Z @@ -4595,15 +4430,11 @@ ?setKernelIsCooperative@handler@_V1@sycl@@AEAAX_N@Z ?setLocalAccessorArgHelper@handler@_V1@sycl@@AEAAXHAEAVLocalAccessorBaseHost@detail@23@@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z -?setPitches@image_impl@detail@_V1@sycl@@AEAAXAEBV?$range@$01@34@@Z -?setPitches@image_impl@detail@_V1@sycl@@AEAAXXZ ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ ?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z ?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z -?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAX$$T@Z -?set_final_data@SYCLMemObjT@detail@_V1@sycl@@QEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z -?set_final_data_from_storage@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ +?set_access_mode@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KW4address_access_mode@12345@AEBVcontext@45@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXXZ ?set_final_data_internal@image_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z @@ -4612,18 +4443,15 @@ ?set_flag@stream@_V1@sycl@@AEBAXII@Z ?set_manipulator@stream@_V1@sycl@@AEBAXW4stream_manipulator@23@@Z ?set_specialization_constant_impl@kernel_bundle_plain@detail@_V1@sycl@@IEAAXPEBDPEAX_K@Z -?set_write_back@SYCLMemObjT@detail@_V1@sycl@@QEAAX_N@Z ?set_write_back@buffer_plain@detail@_V1@sycl@@IEAAX_N@Z ?set_write_back@image_plain@detail@_V1@sycl@@IEAAX_N@Z ?sincos_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAV45123@@Z ?sincos_impl@detail@_V1@sycl@@YAMMPEAM@Z ?sincos_impl@detail@_V1@sycl@@YANNPEAN@Z ?single_task@handler@_V1@sycl@@QEAAXVkernel@23@@Z -?size@SYCLMemObjT@detail@_V1@sycl@@QEBA_KXZ ?size@exception_list@_V1@sycl@@QEBA_KXZ -?size@image_impl@detail@_V1@sycl@@QEBA_KXZ +?size@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_KXZ ?size@stream@_V1@sycl@@QEBA_KXZ -?size@stream_impl@detail@_V1@sycl@@QEBA_KXZ ?start@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ ?start_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ ?stringifyErrorCode@detail@_V1@sycl@@YAPEBDH@Z @@ -4631,29 +4459,24 @@ ?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z +?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@@Z ?supportsUSMFill2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemcpy2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemset2D@handler@_V1@sycl@@AEAA_NXZ ?sycl_category@_V1@sycl@@YAAEBVerror_category@std@@XZ ?throwIfActionIsCreated@handler@_V1@sycl@@AEAAXXZ ?throw_asynchronous@queue@_V1@sycl@@QEAAXXZ -?unmap@MemoryManager@detail@_V1@sycl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@1V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z +?unmap@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVcontext@45@@Z ?unsampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@W4mode@access@23@PEBXIAEBUcode_location@123@@Z -?unsampledImageConstructorNotification@image_impl@detail@_V1@sycl@@QEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@@Z ?unsampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@@Z -?unsampledImageDestructorNotification@image_impl@detail@_V1@sycl@@QEAAXPEAX@Z ?unsampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z ?unset_flag@stream@_V1@sycl@@AEBAXI@Z ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$command_graph@$0A@@34567@@Z ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@@Z ?update@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAXAEBVnode@34567@@Z ?updateAccessor@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBVAccessorBaseHost@267@@Z -?updateHostMemory@SYCLMemObjT@detail@_V1@sycl@@IEAAXQEAX@Z -?updateHostMemory@SYCLMemObjT@detail@_V1@sycl@@IEAAXXZ ?updateValue@dynamic_parameter_base@detail@experimental@oneapi@ext@_V1@sycl@@IEAAXPEBX_K@Z -?useHostPtr@SYCLMemObjT@detail@_V1@sycl@@QEAA_NXZ ?use_kernel_bundle@handler@_V1@sycl@@QEAAXAEBV?$kernel_bundle@$01@23@@Z -?usesPinnedHostMemory@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ ?verifyDeviceHasProgressGuarantee@handler@_V1@sycl@@AEAAXW4forward_progress_guarantee@experimental@oneapi@ext@23@W4execution_scope@56723@1@Z ?verifyKernelInvoc@handler@_V1@sycl@@AEAAXAEBVkernel@23@@Z ?verifyUsedKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z diff --git a/sycl/test/basic_tests/interop-level-zero-2020.cpp b/sycl/test/basic_tests/interop-level-zero-2020.cpp index 1f7c2a83d578b..1a39b8acb16b1 100644 --- a/sycl/test/basic_tests/interop-level-zero-2020.cpp +++ b/sycl/test/basic_tests/interop-level-zero-2020.cpp @@ -1,12 +1,13 @@ // RUN: %clangxx %fsycl-host-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s // RUN: %clangxx %fsycl-host-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note -D__SYCL_INTERNAL_API %s +// expected-no-diagnostics + // Test for SYCL-2020 Level Zero interop API -// clang-format off #include + #include -// clang-format on #include @@ -122,18 +123,5 @@ int main() { {KernelBundle, ZeKernel, ext::oneapi::level_zero::ownership::keep}, Context); - // Check deprecated - // expected-warning@+1 {{'make' is deprecated: Use SYCL 2020 sycl::make_platform free function}} - auto P = ext::oneapi::level_zero::make(ZeDriver); - // expected-warning@+1 {{'make' is deprecated: Use SYCL 2020 sycl::make_device free function}} - auto D = ext::oneapi::level_zero::make(P, ZeDevice); - // expected-warning@+1 {{'make' is deprecated: Use SYCL 2020 sycl::make_context free function}} - auto C = ext::oneapi::level_zero::make( - std::vector(1, D), ZeContext, - ext::oneapi::level_zero::ownership::keep); - // expected-warning@+1 {{'make' is deprecated: Use SYCL 2020 sycl::make_event free function}} - auto E = ext::oneapi::level_zero::make( - Context, ZeEvent, ext::oneapi::level_zero::ownership::keep); - return 0; } diff --git a/sycl/test/check_device_code/cuda/matrix/matrix-nvptx-compile-query-test.cpp b/sycl/test/matrix/cuda/matrix-nvptx-compile-query-test.cpp similarity index 100% rename from sycl/test/check_device_code/cuda/matrix/matrix-nvptx-compile-query-test.cpp rename to sycl/test/matrix/cuda/matrix-nvptx-compile-query-test.cpp diff --git a/sycl/test/check_device_code/hip/matrix/compile-query-hip-gfx90a.cpp b/sycl/test/matrix/hip/compile-query-hip-gfx90a.cpp similarity index 100% rename from sycl/test/check_device_code/hip/matrix/compile-query-hip-gfx90a.cpp rename to sycl/test/matrix/hip/compile-query-hip-gfx90a.cpp diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index 491fa49225a81..ee39a80625d85 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -10,6 +10,8 @@ add_sycl_unittest(ExtensionsTests OBJECT USMP2P.cpp CompositeDevice.cpp OneAPIProd.cpp + EnqueueFunctionsEvents.cpp + DiscardEvent.cpp ) add_subdirectory(CommandGraph) diff --git a/sycl/unittests/Extensions/DiscardEvent.cpp b/sycl/unittests/Extensions/DiscardEvent.cpp new file mode 100644 index 0000000000000..dc729c74084e0 --- /dev/null +++ b/sycl/unittests/Extensions/DiscardEvent.cpp @@ -0,0 +1,83 @@ +//==------------------------- DiscardEvent.cpp -----------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include + +#include +#include +#include + +using namespace sycl; + +namespace oneapiext = ext::oneapi::experimental; + +namespace { + +thread_local size_t counter_piEnqueueKernelLaunch = 0; +inline pi_result redefined_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piEnqueueKernelLaunch; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; +thread_local std::chrono::time_point + timestamp_piEnqueueEventsWaitWithBarrier; +inline pi_result after_piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, + const pi_event *, + pi_event *) { + ++counter_piEnqueueEventsWaitWithBarrier; + timestamp_piEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); + return PI_SUCCESS; +} + +class DiscardEventTests : public ::testing::Test { +public: + DiscardEventTests() + : Mock{}, Q{context(Mock.getPlatform()), default_selector_v, + property::queue::in_order{}} {} + +protected: + void SetUp() override { + counter_piEnqueueKernelLaunch = 0; + counter_piEnqueueEventsWaitWithBarrier = 0; + } + + unittest::PiMock Mock; + queue Q; +}; + +TEST_F(DiscardEventTests, BarrierBeforeHostTask) { + // Special test for case where host_task need an event after, so a barrier is + // enqueued to create a usable event. + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter( + after_piEnqueueEventsWaitWithBarrier); + + oneapiext::single_task>(Q, []() {}); + + std::chrono::time_point HostTaskTimestamp; + Q.submit([&](handler &CGH) { + CGH.host_task( + [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); + }).wait(); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, size_t{1}); + ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); +} + +} // namespace diff --git a/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp new file mode 100644 index 0000000000000..842e3cf271216 --- /dev/null +++ b/sycl/unittests/Extensions/EnqueueFunctionsEvents.cpp @@ -0,0 +1,474 @@ +//==-------------------- EnqueueFunctionsEvents.cpp ------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Tests the behavior of enqueue free functions when events can be discarded. + +#include +#include + +#include + +#include +#include +#include +#include + +using namespace sycl; + +namespace oneapiext = ext::oneapi::experimental; + +namespace { + +inline pi_result after_piKernelGetInfo(pi_kernel kernel, + pi_kernel_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + constexpr char MockKernel[] = "TestKernel"; + if (param_name == PI_KERNEL_INFO_FUNCTION_NAME) { + if (param_value) { + assert(param_value_size == sizeof(MockKernel)); + std::memcpy(param_value, MockKernel, sizeof(MockKernel)); + } + if (param_value_size_ret) + *param_value_size_ret = sizeof(MockKernel); + } + return PI_SUCCESS; +} + +thread_local size_t counter_piEnqueueKernelLaunch = 0; +inline pi_result redefined_piEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piEnqueueKernelLaunch; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueueMemcpy = 0; +inline pi_result redefined_piextUSMEnqueueMemcpy(pi_queue, pi_bool, void *, + const void *, size_t, + pi_uint32, const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueueMemcpy; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueueMemset = 0; +inline pi_result redefined_piextUSMEnqueueMemset(pi_queue, void *, pi_int32, + size_t, pi_uint32, + const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueueMemset; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueuePrefetch = 0; +inline pi_result redefined_piextUSMEnqueuePrefetch(pi_queue, const void *, + size_t, + pi_usm_migration_flags, + pi_uint32, const pi_event *, + pi_event *event) { + ++counter_piextUSMEnqueuePrefetch; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piextUSMEnqueueMemAdvise = 0; +inline pi_result redefined_piextUSMEnqueueMemAdvise(pi_queue, const void *, + size_t, pi_mem_advice, + pi_event *event) { + ++counter_piextUSMEnqueueMemAdvise; + EXPECT_EQ(event, nullptr); + return PI_SUCCESS; +} + +thread_local size_t counter_piEnqueueEventsWaitWithBarrier = 0; +thread_local std::chrono::time_point + timestamp_piEnqueueEventsWaitWithBarrier; +inline pi_result after_piEnqueueEventsWaitWithBarrier(pi_queue, pi_uint32, + const pi_event *, + pi_event *) { + ++counter_piEnqueueEventsWaitWithBarrier; + timestamp_piEnqueueEventsWaitWithBarrier = std::chrono::steady_clock::now(); + return PI_SUCCESS; +} + +class EnqueueFunctionsEventsTests : public ::testing::Test { +public: + EnqueueFunctionsEventsTests() + : Mock{}, Q{context(Mock.getPlatform()), default_selector_v, + property::queue::in_order{}} {} + +protected: + void SetUp() override { + counter_piEnqueueKernelLaunch = 0; + counter_piextUSMEnqueueMemcpy = 0; + counter_piextUSMEnqueueMemset = 0; + counter_piextUSMEnqueuePrefetch = 0; + counter_piextUSMEnqueueMemAdvise = 0; + counter_piEnqueueEventsWaitWithBarrier = 0; + } + + unittest::PiMock Mock; + queue Q; +}; + +TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::single_task>(CGH, []() {}); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::single_task>(Q, []() {}); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitSingleTaskKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + oneapiext::submit(Q, + [&](handler &CGH) { oneapiext::single_task(CGH, Kernel); }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SingleTaskShortcutKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + + oneapiext::single_task(Q, Kernel); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::parallel_for>(CGH, range<1>{32}, [](item<1>) {}); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::parallel_for>(Q, range<1>{32}, [](item<1>) {}); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitRangeParallelForKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::parallel_for(CGH, range<1>{32}, Kernel); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, RangeParallelForShortcutKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + + oneapiext::parallel_for(Q, range<1>{32}, Kernel); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::nd_launch>( + CGH, nd_range<1>{range<1>{32}, range<1>{32}}, [](nd_item<1>) {}); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + + oneapiext::nd_launch>(Q, nd_range<1>{range<1>{32}, range<1>{32}}, + [](nd_item<1>) {}); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitNDLaunchKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::nd_launch(CGH, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); + }); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, NDLaunchShortcutKernelNoEvent) { + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter(after_piKernelGetInfo); + + auto KID = get_kernel_id>(); + auto KB = get_kernel_bundle( + Q.get_context(), std::vector{KID}); + + ASSERT_TRUE(KB.has_kernel(KID)); + + auto Kernel = KB.get_kernel(KID); + + oneapiext::nd_launch(Q, nd_range<1>{range<1>{32}, range<1>{32}}, Kernel); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitMemcpyNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::memcpy(CGH, Src, Dst, sizeof(int) * N); + }); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, MemcpyShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::memcpy(Q, Src, Dst, sizeof(int) * N); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitCopyNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, + [&](handler &CGH) { oneapiext::copy(CGH, Dst, Src, N); }); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, CopyShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemcpy); + + constexpr size_t N = 1024; + int *Src = malloc_shared(N, Q); + int *Dst = malloc_shared(N, Q); + + oneapiext::memcpy(Q, Dst, Src, N); + + ASSERT_EQ(counter_piextUSMEnqueueMemcpy, size_t{1}); + + free(Src, Q); + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitMemsetNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemset); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::memset(CGH, Dst, int{1}, sizeof(int) * N); + }); + + ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, MemsetShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemset); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::memset(Q, Dst, 1, sizeof(int) * N); + + ASSERT_EQ(counter_piextUSMEnqueueMemset, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitPrefetchNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueuePrefetch); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::submit( + Q, [&](handler &CGH) { oneapiext::prefetch(CGH, Dst, sizeof(int) * N); }); + + ASSERT_EQ(counter_piextUSMEnqueuePrefetch, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, PrefetchShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueuePrefetch); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::prefetch(Q, Dst, sizeof(int) * N); + + ASSERT_EQ(counter_piextUSMEnqueuePrefetch, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, SubmitMemAdviseNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemAdvise); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::submit(Q, [&](handler &CGH) { + oneapiext::mem_advise(CGH, Dst, sizeof(int) * N, 1); + }); + + ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, MemAdviseShortcutNoEvent) { + Mock.redefine( + redefined_piextUSMEnqueueMemAdvise); + + constexpr size_t N = 1024; + int *Dst = malloc_shared(N, Q); + + oneapiext::mem_advise(Q, Dst, sizeof(int) * N, 1); + + ASSERT_EQ(counter_piextUSMEnqueueMemAdvise, size_t{1}); + + free(Dst, Q); +} + +TEST_F(EnqueueFunctionsEventsTests, BarrierBeforeHostTask) { + // Special test for case where host_task need an event after, so a barrier is + // enqueued to create a usable event. + Mock.redefine( + redefined_piEnqueueKernelLaunch); + Mock.redefineAfter( + after_piEnqueueEventsWaitWithBarrier); + + oneapiext::single_task>(Q, []() {}); + + std::chrono::time_point HostTaskTimestamp; + Q.submit([&](handler &CGH) { + CGH.host_task( + [&]() { HostTaskTimestamp = std::chrono::steady_clock::now(); }); + }).wait(); + + ASSERT_EQ(counter_piEnqueueKernelLaunch, size_t{1}); + ASSERT_EQ(counter_piEnqueueEventsWaitWithBarrier, size_t{1}); + ASSERT_TRUE(HostTaskTimestamp > timestamp_piEnqueueEventsWaitWithBarrier); +} + +} // namespace diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index 543436a50f96e..f38dbe4495b81 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -157,8 +157,6 @@ TEST(ParseAllowListTests, CheckMissingClosedDoubleCurlyBracesAreHandled) { } } -// TODO: Remove esimd_emulator from ExpectedValue in the next ABI -// breaking window. TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { std::string AllowList; for (const auto &SyclBe : sycl::detail::getSyclBeMap()) { @@ -171,8 +169,8 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { sycl::detail::AllowListParsedT ExpectedValue{ {{"BackendName", "host"}}, {{"BackendName", "opencl"}}, {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, - {{"BackendName", "hip"}}, {{"BackendName", "esimd_emulator"}}, - {{"BackendName", "native_cpu"}}, {{"BackendName", "*"}}}; + {{"BackendName", "hip"}}, {{"BackendName", "native_cpu"}}, + {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); } diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 56803e7eab5bb..4e41a88c14544 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1023,7 +1023,8 @@ inline pi_result mock_piEnqueueKernelLaunch( const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1032,7 +1033,8 @@ inline pi_result mock_piextEnqueueCooperativeKernelLaunch( const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1040,14 +1042,16 @@ inline pi_result mock_piEnqueueEventsWait(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } inline pi_result mock_piEnqueueEventsWaitWithBarrier( pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1056,7 +1060,8 @@ mock_piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, pi_bool blocking_read, size_t offset, size_t size, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1067,7 +1072,8 @@ inline pi_result mock_piEnqueueMemBufferReadRect( size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1076,7 +1082,8 @@ mock_piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, size_t offset, size_t size, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1087,7 +1094,8 @@ inline pi_result mock_piEnqueueMemBufferWriteRect( size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1097,7 +1105,8 @@ mock_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, size_t dst_offset, size_t size, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1108,7 +1117,8 @@ inline pi_result mock_piEnqueueMemBufferCopyRect( size_t dst_row_pitch, size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1119,7 +1129,8 @@ inline pi_result mock_piEnqueueMemBufferFill(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1128,7 +1139,8 @@ inline pi_result mock_piEnqueueMemImageRead( pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1139,7 +1151,8 @@ mock_piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1149,7 +1162,8 @@ mock_piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, pi_image_offset dst_origin, pi_image_region region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1159,7 +1173,8 @@ mock_piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, const size_t *region, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1170,7 +1185,8 @@ inline pi_result mock_piEnqueueMemBufferMap(pi_queue command_queue, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event, void **ret_map) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); auto parentDummyHandle = reinterpret_cast(buffer); *ret_map = (void *)(parentDummyHandle->MData); @@ -1182,7 +1198,8 @@ inline pi_result mock_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1247,7 +1264,8 @@ inline pi_result mock_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1257,7 +1275,8 @@ inline pi_result mock_piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1267,7 +1286,8 @@ inline pi_result mock_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1275,7 +1295,8 @@ inline pi_result mock_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, size_t length, pi_mem_advice advice, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1318,7 +1339,8 @@ inline pi_result mock_piextEnqueueDeviceGlobalVariableWrite( pi_bool blocking_write, size_t count, size_t offset, const void *src, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1326,7 +1348,63 @@ inline pi_result mock_piextEnqueueDeviceGlobalVariableRead( pi_queue queue, pi_program program, const char *name, pi_bool blocking_read, size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); + return PI_SUCCESS; +} + +inline pi_result +mock_piextVirtualMemGranularityGetInfo(pi_context, pi_device, + pi_virtual_mem_granularity_info, size_t, + void *, size_t *) { + return PI_SUCCESS; +} + +inline pi_result +mock_piextPhysicalMemCreate(pi_context, pi_device, size_t, + pi_physical_mem *ret_physical_mem) { + *ret_physical_mem = createDummyHandle(); + return PI_SUCCESS; +} + +inline pi_result mock_piextPhysicalMemRetain(pi_physical_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextPhysicalMemRelease(pi_physical_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemReserve(pi_context, const void *start, + size_t range_size, + void **ret_ptr) { + *ret_ptr = + start ? const_cast(start) : createDummyHandle(range_size); + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemFree(pi_context, const void *, size_t) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemMap(pi_context, const void *, size_t, + pi_physical_mem, size_t, + pi_virtual_access_flags) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemUnmap(pi_context, const void *, size_t) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemSetAccess(pi_context, const void *, size_t, + pi_virtual_access_flags) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemGetInfo(pi_context, const void *, size_t, + pi_virtual_mem_info, size_t, + void *, size_t *) { return PI_SUCCESS; } @@ -1491,6 +1569,14 @@ inline pi_result mock_piextCommandBufferAdviseUSM( return PI_SUCCESS; } +inline pi_result mock_piextEnqueueNativeCommand(pi_queue, + void (*)(pi_queue, void *), + void *, uint32_t, + const pi_mem *, pi_uint32, + const pi_event *, pi_event *) { + return PI_SUCCESS; +} + inline pi_result mock_piTearDown(void *PluginParameter) { return PI_SUCCESS; } inline pi_result mock_piPluginGetLastError(char **message) { @@ -1526,7 +1612,8 @@ inline pi_result mock_piextEnqueueReadHostPipe( pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } @@ -1534,7 +1621,8 @@ inline pi_result mock_piextEnqueueWriteHostPipe( pi_queue queue, pi_program program, const char *pipe_symbol, pi_bool blocking, void *ptr, size_t size, pi_uint32 num_events_in_waitlist, const pi_event *events_waitlist, pi_event *event) { - *event = createDummyHandle(); + if (event) + *event = createDummyHandle(); return PI_SUCCESS; } diff --git a/sycl/unittests/pi/BackendString.hpp b/sycl/unittests/pi/BackendString.hpp index ea90e3ff3eb54..ff50480163208 100644 --- a/sycl/unittests/pi/BackendString.hpp +++ b/sycl/unittests/pi/BackendString.hpp @@ -12,8 +12,7 @@ inline std::string GetBackendString(const sycl::detail::PluginPtr &Plugin) { std::stringstream Str; for (sycl::backend Backend : {sycl::backend::opencl, sycl::backend::ext_oneapi_level_zero, - sycl::backend::ext_oneapi_cuda, sycl::backend::ext_intel_esimd_emulator, - sycl::backend::ext_oneapi_hip}) { + sycl::backend::ext_oneapi_cuda, sycl::backend::ext_oneapi_hip}) { if (Plugin->hasBackend(Backend)) { Str << Backend; } diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 35e353780d450..b4d4e7cdb7535 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -111,7 +111,7 @@ class MockHandler : public sycl::handler { public: MockHandler(std::shared_ptr Queue) - : sycl::handler(Queue, /* IsHost */ false) {} + : sycl::handler(Queue, /* IsHost */ false, /*CallerNeedsEvent*/ true) {} std::unique_ptr finalize() { auto CGH = static_cast(this); diff --git a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp index 473ddcefe75fa..bf200ec660fb9 100644 --- a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp +++ b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp @@ -21,7 +21,8 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) { std::vector ToEnqueue; - MockHandlerCustomFinalize MockCGH(QueueDevImpl, false); + MockHandlerCustomFinalize MockCGH(QueueDevImpl, false, + /*CallerNeedsEvent=*/true); sycl::accessor B; @@ -33,8 +34,8 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) { std::unique_ptr CmdGroup = MockCGH.finalize(); - detail::Command *NewCmd = - MS.addCG(std::move(CmdGroup), QueueDevImpl, ToEnqueue); + detail::Command *NewCmd = MS.addCG(std::move(CmdGroup), QueueDevImpl, + ToEnqueue, /*EventNeeded=*/true); // if MDeps is empty, accessor built from default ctor does not affect // dependency graph in accordance with SYCL 2020 diff --git a/sycl/unittests/scheduler/Commands.cpp b/sycl/unittests/scheduler/Commands.cpp index a995800643421..bd0df10d1309a 100644 --- a/sycl/unittests/scheduler/Commands.cpp +++ b/sycl/unittests/scheduler/Commands.cpp @@ -81,6 +81,7 @@ TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) { std::unique_ptr CommandGroup(new detail::CGBarrier( std::move(Arg), detail::CG::StorageInitHelper({}, {}, {}, {}, {}), detail::CG::CGTYPE::BarrierWaitlist, {})); - MS.Scheduler::addCG(std::move(CommandGroup), QueueImpl); + MS.Scheduler::addCG(std::move(CommandGroup), QueueImpl, + /*EventNeeded=*/true); } } diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 414f58c6f177c..2e54057e434d6 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -60,7 +60,8 @@ class DependsOnTests : public ::testing::Test { std::vector ToEnqueue; // Emulating processing of command group function - MockHandlerCustomFinalize MockCGH(QueueDevImpl, false); + MockHandlerCustomFinalize MockCGH(QueueDevImpl, false, + /*CallerNeedsEvent=*/true); for (auto EventImpl : Events) MockCGH.depends_on(detail::createSyclObjFromImpl(EventImpl)); @@ -84,7 +85,7 @@ class DependsOnTests : public ::testing::Test { detail::Command *NewCmd = MS.addCG( std::move(CmdGroup), Type == TestCGType::HOST_TASK ? MS.getDefaultHostQueue() : QueueDevImpl, - ToEnqueue); + ToEnqueue, /*EventNeeded=*/true); EXPECT_EQ(ToEnqueue.size(), 0u); return NewCmd; } diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp index 9bf4e37eea0db..8206728b2b221 100644 --- a/sycl/unittests/scheduler/GraphCleanup.cpp +++ b/sycl/unittests/scheduler/GraphCleanup.cpp @@ -106,7 +106,8 @@ static void checkCleanupOnEnqueue(MockScheduler &MS, /*SharedPtrStorage*/ {}, /*Requirements*/ {&MockReq}, /*Events*/ {}))}; - detail::EventImplPtr Event = MS.addCG(std::move(CG), QueueImpl); + detail::EventImplPtr Event = + MS.addCG(std::move(CG), QueueImpl, /*EventNeeded=*/true); auto *Cmd = static_cast(Event->getCommand()); verifyCleanup(Record, AllocaCmd, MockCmd, CommandDeleted); @@ -330,7 +331,8 @@ TEST_F(SchedulerTest, StreamBufferDeallocation) { AttachSchedulerWrapper AttachScheduler{MSPtr}; detail::EventImplPtr EventImplPtr; { - MockHandlerCustomFinalize MockCGH(QueueImplPtr, false); + MockHandlerCustomFinalize MockCGH(QueueImplPtr, false, + /*CallerNeedsEvent=*/true); kernel_bundle KernelBundle = sycl::get_kernel_bundle( QueueImplPtr->get_context()); @@ -341,7 +343,8 @@ TEST_F(SchedulerTest, StreamBufferDeallocation) { MockCGH.single_task>([] {}); std::unique_ptr CG = MockCGH.finalize(); - EventImplPtr = MSPtr->addCG(std::move(CG), QueueImplPtr); + EventImplPtr = + MSPtr->addCG(std::move(CG), QueueImplPtr, /*EventNeeded=*/true); } // The buffers should have been released with graph cleanup once the work is @@ -391,7 +394,8 @@ TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) { detail::EventImplPtr EventImplPtr; bool MockAuxResourceDeleted = false; { - MockHandlerCustomFinalize MockCGH(QueueImplPtr, false); + MockHandlerCustomFinalize MockCGH(QueueImplPtr, false, + /*CallerNeedsEvent=*/true); kernel_bundle KernelBundle = sycl::get_kernel_bundle( QueueImplPtr->get_context()); @@ -410,7 +414,8 @@ TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) { MockCGH.single_task>([] {}); std::unique_ptr CG = MockCGH.finalize(); - EventImplPtr = MSPtr->addCG(std::move(CG), QueueImplPtr); + EventImplPtr = + MSPtr->addCG(std::move(CG), QueueImplPtr, /*EventNeeded=*/true); } EventCompleted = false; diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index ab37a667fc90a..3f97ffb003adc 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -19,10 +19,21 @@ using namespace sycl; using ::testing::An; +class MockQueueImpl : public sycl::detail::queue_impl { +public: + MockQueueImpl(const sycl::detail::DeviceImplPtr &Device, + const sycl::async_handler &AsyncHandler, + const sycl::property_list &PropList) + : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} + using sycl::detail::queue_impl::finalizeHandler; +}; + // Define type with the only methods called by finalizeHandler class LimitedHandler { public: - LimitedHandler(sycl::detail::CG::CGTYPE CGType) : MCGType(CGType) {} + LimitedHandler(sycl::detail::CG::CGTYPE CGType, + std::shared_ptr Queue) + : MCGType(CGType), MQueue(Queue) {} virtual ~LimitedHandler() {} virtual void depends_on(const sycl::detail::EventImplPtr &) {} @@ -38,14 +49,16 @@ class LimitedHandler { sycl::detail::CG::CGTYPE getType() { return MCGType; } sycl::detail::CG::CGTYPE MCGType; + std::shared_ptr MQueue; }; // Needed to use EXPECT_CALL to verify depends_on that originally appends lst // event as dependency to the new CG class LimitedHandlerSimulation : public LimitedHandler { public: - LimitedHandlerSimulation(sycl::detail::CG::CGTYPE CGType) - : LimitedHandler(CGType) {} + LimitedHandlerSimulation(sycl::detail::CG::CGTYPE CGType, + std::shared_ptr Queue) + : LimitedHandler(CGType, Queue) {} MOCK_METHOD1(depends_on, void(const sycl::detail::EventImplPtr &)); MOCK_METHOD1(depends_on, void(event Event)); @@ -53,15 +66,6 @@ class LimitedHandlerSimulation : public LimitedHandler { void(const std::vector &Events)); }; -class MockQueueImpl : public sycl::detail::queue_impl { -public: - MockQueueImpl(const sycl::detail::DeviceImplPtr &Device, - const sycl::async_handler &AsyncHandler, - const sycl::property_list &PropList) - : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} - using sycl::detail::queue_impl::finalizeHandler; -}; - // Only check events dependency in queue_impl::finalizeHandler TEST_F(SchedulerTest, InOrderQueueSyncCheck) { sycl::unittest::PiMock Mock; @@ -76,13 +80,15 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { // previous task, this is needed to properly sync blocking & blocked tasks. sycl::event Event; { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask}; + LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(0); Queue->finalizeHandler(MockCGH, Event); } { - LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask}; + LimitedHandlerSimulation MockCGH{detail::CG::CGTYPE::CodeplayHostTask, + Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(1); Queue->finalizeHandler(MockCGH, Event); diff --git a/sycl/unittests/scheduler/KernelFusion.cpp b/sycl/unittests/scheduler/KernelFusion.cpp index 8b45c03e37f1f..1db16cbda1493 100644 --- a/sycl/unittests/scheduler/KernelFusion.cpp +++ b/sycl/unittests/scheduler/KernelFusion.cpp @@ -22,7 +22,8 @@ template detail::Command *CreateTaskCommand(MockScheduler &MS, detail::QueueImplPtr DevQueue, buffer &buf) { - MockHandlerCustomFinalize MockCGH(DevQueue, false); + MockHandlerCustomFinalize MockCGH(DevQueue, false, + /*CallerNeedsEvent=*/true); auto acc = buf.get_access(static_cast(MockCGH)); @@ -36,7 +37,8 @@ detail::Command *CreateTaskCommand(MockScheduler &MS, auto CmdGrp = MockCGH.finalize(); std::vector ToEnqueue; - detail::Command *NewCmd = MS.addCG(std::move(CmdGrp), DevQueue, ToEnqueue); + detail::Command *NewCmd = + MS.addCG(std::move(CmdGrp), DevQueue, ToEnqueue, /*EventNeeded=*/true); EXPECT_EQ(ToEnqueue.size(), 0u); return NewCmd; } diff --git a/sycl/unittests/scheduler/QueueFlushing.cpp b/sycl/unittests/scheduler/QueueFlushing.cpp index c97428b9d55c6..c27e4d672e0fa 100644 --- a/sycl/unittests/scheduler/QueueFlushing.cpp +++ b/sycl/unittests/scheduler/QueueFlushing.cpp @@ -147,7 +147,8 @@ TEST_F(SchedulerTest, QueueFlushing) { /*SharedPtrStorage*/ {}, /*Requirements*/ {}, /*Events*/ {}))}; - detail::ExecCGCommand ExecCGCmd{std::move(CG), QueueImplA}; + detail::ExecCGCommand ExecCGCmd{std::move(CG), QueueImplA, + /*EventNeeded=*/true}; MockReq.MDims = 1; (void)ExecCGCmd.addDep(detail::DepDesc(&AllocaCmd, &MockReq, &AllocaCmd), ToCleanUp); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 1d7fa2075d0da..b1c667c5c40ca 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -194,11 +194,12 @@ class MockScheduler : public sycl::detail::Scheduler { return MGraphBuilder.addEmptyCmd(Cmd, Reqs, Queue, Reason, ToEnqueue); } - sycl::detail::Command * - addCG(std::unique_ptr CommandGroup, - sycl::detail::QueueImplPtr Queue, - std::vector &ToEnqueue) { - return MGraphBuilder.addCG(std::move(CommandGroup), Queue, ToEnqueue) + sycl::detail::Command *addCG(std::unique_ptr CommandGroup, + sycl::detail::QueueImplPtr Queue, + std::vector &ToEnqueue, + bool EventNeeded) { + return MGraphBuilder + .addCG(std::move(CommandGroup), Queue, ToEnqueue, EventNeeded) .NewCmd; } @@ -225,8 +226,9 @@ sycl::detail::Requirement getMockRequirement(const MemObjT &MemObj) { class MockHandler : public sycl::handler { public: - MockHandler(std::shared_ptr Queue, bool IsHost) - : sycl::handler(Queue, IsHost) {} + MockHandler(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent) + : sycl::handler(Queue, IsHost, CallerNeedsEvent) {} // Methods using sycl::handler::addReduction; using sycl::handler::getType; @@ -292,8 +294,8 @@ class MockHandler : public sycl::handler { class MockHandlerCustomFinalize : public MockHandler { public: MockHandlerCustomFinalize(std::shared_ptr Queue, - bool IsHost) - : MockHandler(Queue, IsHost) {} + bool IsHost, bool CallerNeedsEvent) + : MockHandler(Queue, IsHost, CallerNeedsEvent) {} std::unique_ptr finalize() { std::unique_ptr CommandGroup; diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 18c0b3e1a8070..7e76027c05431 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -20,8 +20,9 @@ inline constexpr auto DisableCleanupName = class MockHandlerStreamInit : public MockHandler { public: - MockHandlerStreamInit(std::shared_ptr Queue, bool IsHost) - : MockHandler(Queue, IsHost) {} + MockHandlerStreamInit(std::shared_ptr Queue, bool IsHost, + bool CallerNeedsEvent) + : MockHandler(Queue, IsHost, CallerNeedsEvent) {} std::unique_ptr finalize() { std::unique_ptr CommandGroup; switch (getType()) { @@ -85,7 +86,7 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { /*PropList=*/{})); // Emulating processing of command group function - MockHandlerStreamInit MockCGH(HQueueImpl, true); + MockHandlerStreamInit MockCGH(HQueueImpl, true, /*CallerNeedsEvent=*/true); MockCGH.setType(detail::CG::Kernel); auto EmptyKernel = [](sycl::nd_item<1>) {}; @@ -97,7 +98,7 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { // Emulating construction of stream object inside command group detail::StreamImplPtr StreamImpl = - std::make_shared(1024, 200, MockCGH); + std::make_shared(1024, 200, property_list{}); detail::GlobalBufAccessorT FlushBufAcc = StreamImpl->accessGlobalFlushBuf(MockCGH); MockCGH.addStream(StreamImpl); @@ -118,7 +119,8 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { MockScheduler MS; std::vector AuxCmds; - detail::Command *NewCmd = MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds); + detail::Command *NewCmd = + MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds, /*EventNeeded=*/true); ASSERT_TRUE(!!NewCmd) << "Failed to add command group into scheduler"; ASSERT_GT(NewCmd->MDeps.size(), 0u) << "No deps appeared in the new exec kernel command"; diff --git a/sycl/unittests/thread_safety/ThreadUtils.h b/sycl/unittests/thread_safety/ThreadUtils.h index ccbca98d44e3f..4b40123ba1bb7 100644 --- a/sycl/unittests/thread_safety/ThreadUtils.h +++ b/sycl/unittests/thread_safety/ThreadUtils.h @@ -48,7 +48,13 @@ class ThreadPool { enqueueHelper(std::forward(funcs)...); } - ~ThreadPool() { wait(); } + ~ThreadPool() { + try { + wait(); + } catch (std::exception &e) { + std::cerr << "exception in ~ThreadPool" << e.what() << std::endl; + } + } private: template