diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md index b97ef36ad11a3..bb09fc58ad532 100644 --- a/sycl/doc/design/CommandGraph.md +++ b/sycl/doc/design/CommandGraph.md @@ -114,6 +114,24 @@ the scheduler for adding to the UR command-buffer, otherwise the node can be appended directly as a command in the UR command-buffer. This is in-keeping with the existing behaviour of the handler with normal queue submissions. +Scheduler commands for adding graph nodes differ from typical command-group +submission in the scheduler, in that they do not launch any asynchronous work +which relies on their dependencies, and are considered complete immediately +after adding the command-group node to the graph. + +This presents problems with device allocations which create both an allocation +command and a separate initial copy command of data to the new allocation. +Since future command-graph execution submissions will only receive +dependencies on the allocation command (since this is all the information +available), this could lead to situations where the device execution of the +initial copy command is delayed due to device occupancy, and the command-graph +and initial copy could execute on the device in an incorrect order. + +To solve this issue, when the scheduler enqueues command-groups to add as nodes +in a command-graph, it will perform a blocking wait on the dependencies of the +command-group first. The user will experience this wait as part of graph +finalization. + ## Memory handling: Buffer and Accessor There is no extra support for graph-specific USM allocations in the current diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 4dd384c4db050..7830ee3cb07af 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2657,14 +2657,19 @@ enqueueReadWriteHostPipe(const QueueImplPtr &Queue, const std::string &PipeName, } pi_int32 ExecCGCommand::enqueueImpCommandBuffer() { - std::vector EventImpls = MPreparedDepsEvents; - auto RawEvents = getPiEvents(EventImpls); - flushCrossQueueDeps(EventImpls, getWorkerQueue()); + // Wait on host command dependencies + waitForPreparedHostEvents(); - // Any non-allocation dependencies need to be waited on here since subsequent + // Any device dependencies need to be waited on here since subsequent // submissions of the command buffer itself will not receive dependencies on // them, e.g. initial copies from host to device - waitForEvents(MQueue, MPreparedDepsEvents, MEvent->getHandleRef()); + std::vector EventImpls = MPreparedDepsEvents; + flushCrossQueueDeps(EventImpls, getWorkerQueue()); + std::vector RawEvents = getPiEvents(EventImpls); + if (!RawEvents.empty()) { + const PluginPtr &Plugin = MQueue->getPlugin(); + Plugin->call(RawEvents.size(), &RawEvents[0]); + } sycl::detail::pi::PiEvent *Event = (MQueue->has_discard_events_support() && diff --git a/sycl/test-e2e/Graph/Explicit/basic_buffer.cpp b/sycl/test-e2e/Graph/Explicit/basic_buffer.cpp index f39025696ef75..6191a875bbe41 100644 --- a/sycl/test-e2e/Graph/Explicit/basic_buffer.cpp +++ b/sycl/test-e2e/Graph/Explicit/basic_buffer.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/basic_buffer.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/buffer_copy.cpp b/sycl/test-e2e/Graph/Explicit/buffer_copy.cpp index 91ea5421c38bc..3c291d4d44393 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_copy.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_copy.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// XFAIL: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/buffer_copy.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/buffer_copy_2d.cpp b/sycl/test-e2e/Graph/Explicit/buffer_copy_2d.cpp index 9a9c87086518a..446d75316e6e2 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_copy_2d.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_copy_2d.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// XFAIL: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/buffer_copy_2d.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target.cpp b/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target.cpp index 7cd0351d6f6f6..8c233ec8de66e 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/buffer_copy_host2target.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target_2d.cpp b/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target_2d.cpp index 83c98db0790e9..9c33e885ce8a5 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target_2d.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target_2d.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/buffer_copy_host2target_2d.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target_offset.cpp b/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target_offset.cpp index 8f2838bec74d1..2c26c24744f0e 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target_offset.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_copy_host2target_offset.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/buffer_copy_host2target_offset.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/buffer_copy_offsets.cpp b/sycl/test-e2e/Graph/Explicit/buffer_copy_offsets.cpp index cb16636a5f2df..746b41f4e0a76 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_copy_offsets.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_copy_offsets.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// XFAIL: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/buffer_copy_offsets.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host.cpp b/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host.cpp index dc6db58859a7a..e3a9ceb3160a2 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host.cpp @@ -1,6 +1,3 @@ -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - // REQUIRES: level_zero, gpu // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host_2d.cpp b/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host_2d.cpp index 29f359303951b..f9945ebf3ee58 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host_2d.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host_2d.cpp @@ -1,6 +1,3 @@ -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - // REQUIRES: level_zero, gpu // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host_offset.cpp b/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host_offset.cpp index 15bb37701036e..c51b9e445137c 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host_offset.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_copy_target2host_offset.cpp @@ -1,6 +1,3 @@ -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - // REQUIRES: level_zero, gpu // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Graph/Explicit/event_status_querying.cpp b/sycl/test-e2e/Graph/Explicit/event_status_querying.cpp index d7eb472f67ba4..4d5831d494aa1 100644 --- a/sycl/test-e2e/Graph/Explicit/event_status_querying.cpp +++ b/sycl/test-e2e/Graph/Explicit/event_status_querying.cpp @@ -4,9 +4,6 @@ // // CHECK: complete -// https://github.com/intel/llvm/issues/11434 -// XFAIL: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/event_status_querying.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/temp_buffer_reinterpret.cpp b/sycl/test-e2e/Graph/Explicit/temp_buffer_reinterpret.cpp index 6f47ba0d4ad70..d5b3ff7412b61 100644 --- a/sycl/test-e2e/Graph/Explicit/temp_buffer_reinterpret.cpp +++ b/sycl/test-e2e/Graph/Explicit/temp_buffer_reinterpret.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/temp_buffer_reinterpret.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/usm_copy.cpp b/sycl/test-e2e/Graph/Explicit/usm_copy.cpp index b12cfa71dbc23..e0771a3e6d082 100644 --- a/sycl/test-e2e/Graph/Explicit/usm_copy.cpp +++ b/sycl/test-e2e/Graph/Explicit/usm_copy.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_EXPLICIT #include "../Inputs/usm_copy.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/basic_buffer.cpp b/sycl/test-e2e/Graph/RecordReplay/basic_buffer.cpp index f9d8f0a029b99..7d0c7c81d780f 100644 --- a/sycl/test-e2e/Graph/RecordReplay/basic_buffer.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/basic_buffer.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/basic_buffer.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_copy.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_copy.cpp index 235c63827d9a6..77270b1e9bebe 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_copy.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_copy.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// XFAIL: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/buffer_copy.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_2d.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_2d.cpp index 11626664c403d..d00aa10368368 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_2d.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_2d.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// XFAIL: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/buffer_copy_2d.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target.cpp index bdffcd89f8253..7364dea5c7779 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/buffer_copy_host2target.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target_2d.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target_2d.cpp index 38a3c31cd3600..b650bc67faeb7 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target_2d.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target_2d.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/buffer_copy_host2target_2d.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target_offset.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target_offset.cpp index 4d615b70b2551..9f2cb1b787902 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target_offset.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_host2target_offset.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/buffer_copy_host2target_offset.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_offsets.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_offsets.cpp index 81a7c88577582..05922690d99f4 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_offsets.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_offsets.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// XFAIL: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/buffer_copy_offsets.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host.cpp index a396cf97d8491..1954e2c5bfef8 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host.cpp @@ -1,6 +1,3 @@ -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - // REQUIRES: level_zero, gpu // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host_2d.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host_2d.cpp index 5f3c20657f0e4..2c3eaa28e7ad2 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host_2d.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host_2d.cpp @@ -1,6 +1,3 @@ -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - // REQUIRES: level_zero, gpu // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host_offset.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host_offset.cpp index 36c64a7bced95..22f8934482d5e 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host_offset.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_copy_target2host_offset.cpp @@ -1,6 +1,3 @@ -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - // REQUIRES: level_zero, gpu // RUN: %{build} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/Graph/RecordReplay/event_status_querying.cpp b/sycl/test-e2e/Graph/RecordReplay/event_status_querying.cpp index 0c5a27a30cf80..f1a9ae3e49d1a 100644 --- a/sycl/test-e2e/Graph/RecordReplay/event_status_querying.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/event_status_querying.cpp @@ -4,9 +4,6 @@ // // CHECK: complete -// https://github.com/intel/llvm/issues/11434 -// XFAIL: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/event_status_querying.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/temp_buffer_reinterpret.cpp b/sycl/test-e2e/Graph/RecordReplay/temp_buffer_reinterpret.cpp index d6f8ccb4d807f..a51bcc967b2ee 100644 --- a/sycl/test-e2e/Graph/RecordReplay/temp_buffer_reinterpret.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/temp_buffer_reinterpret.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/temp_buffer_reinterpret.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_copy.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_copy.cpp index 66871cb91dcc1..b24dc65614f1e 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_copy.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_copy.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/usm_copy.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp b/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp index 18d76fb22c9af..be9bcf3d64f10 100644 --- a/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/usm_copy_in_order.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// https://github.com/intel/llvm/issues/11434 -// UNSUPPORTED: gpu-intel-dg2 - // Tests memcpy operation using device USM and an in-order queue. #include "../graph_common.hpp" diff --git a/sycl/test-e2e/Graph/graph_exception_global_device_extension.cpp b/sycl/test-e2e/Graph/graph_exception_global_device_extension.cpp index 0250217ddf6af..56e974b6b96e1 100644 --- a/sycl/test-e2e/Graph/graph_exception_global_device_extension.cpp +++ b/sycl/test-e2e/Graph/graph_exception_global_device_extension.cpp @@ -2,7 +2,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out // -// UNSUPPORTED: gpu-intel-pvc // The test checks that invalid exception is thrown // when trying to use sycl_ext_oneapi_device_global // along with Graph. @@ -19,7 +18,7 @@ sycl::ext::oneapi::experimental::device_global enum OperationPath { Explicit, RecordReplay, Shortcut }; template void test() { - queue Q; + queue Q{{sycl::ext::intel::property::queue::no_immediate_command_list{}}}; int MemcpyWrite = 42, CopyWrite = 24, MemcpyRead = 1, CopyRead = 2; exp_ext::command_graph Graph{Q.get_context(), Q.get_device()};