diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc index a9690ab73764c..b9bff2d226c09 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc @@ -248,9 +248,9 @@ to define an edge between existing nodes, or using a Edges can also be created when explicitly adding nodes to the graph through existing SYCL mechanisms for expressing dependencies. Data dependencies from accessors to existing nodes in the graph are captured as an edge. Using -`handler::depends_on()` will also create a graph edge when passed an event -returned from a queue submission captured by a queue recording to the same -graph. +`handler::depends_on()` will also create a graph edge when passed certain kinds +of events, for more information see <>. |=== ==== Queue Recording API @@ -279,12 +279,12 @@ dependencies in one of three ways. Firstly, through buffer accessors that represent data dependencies between two command groups captured as nodes. Secondly, by using the `handler::depends_on()` mechanism inside a command group captured as a node. However, for an event passed to `handler::depends_on()` to -create an edge, it must be an event returned from a queue -submission captured by the same graph. Otherwise, a synchronous error will be -thrown with error code `invalid`. `handler::depends_on()` can be -used to express edges when a user is working with USM memory rather than SYCL -buffers. Thirdly, for a graph recorded with an in-order queue, an edge is added -automatically between two sequential command groups submitted to the in-order queue. +create an edge, it must be of a specific type (see <> for more information). +`handler::depends_on()` can be used to express edges when a user is working with +USM memory rather than SYCL buffers. For a graph recorded with an in-order +queue, an edge is added automatically between two sequential command groups +submitted to the in-order queue. |=== ==== Sub-Graph @@ -336,6 +336,8 @@ enum class node_type { memadvise, ext_oneapi_barrier, host_task, + external_event, + wait_external_event }; class node { @@ -350,6 +352,8 @@ public: static node get_node_from_event(event nodeEvent); + event get_external_event() const; + template void update_nd_range(nd_range executionRange); @@ -418,6 +422,24 @@ Exceptions: * Throws with error code `invalid` if `nodeEvent` is not associated with a graph node. +| +[source,c++] +---- +event get_external_event(); +---- +|Gets the external event associated with this node. Can only be called for nodes +with type `node_type::external_event`. + +For more information on external events see <>. + +Returns: The external event associated with this node. + +Exceptions: + +* Throws with error code `invalid` if the type of this node is not +`node_type::external_event`. + | [source,c++] ---- @@ -632,6 +654,10 @@ public: void make_edge(node& src, node& dest); + node add_wait_external_event(const std::vector eventList = {}, const property_list& propList = {}); + + node add_external_event(const property_list& propList = {}); + void print_graph(std::string path, bool verbose = false) const; std::vector get_nodes() const; @@ -996,6 +1022,65 @@ Exceptions: * Throws synchronously with error code `invalid` if the resulting dependency would lead to a cycle. This error is omitted when `property::graph::no_cycle_check` is set. +| +[source,c++] +---- +node add_wait_external_event(const std::vector eventList, const property_list& propList = {}); +---- + +|Adds a node to the graph which waits on a number of external events. + +For more information on external events see <>. + +Constraints: + +* This member function is only available when the `command_graph` state is + `graph_state::modifiable`. + +Parameters: + +* `eventList` - The events to wait for. + These events can only be external events from other graph's external event + nodes. + +* `propList` - Zero or more properties can be provided to the constructed node + via an instance of `property_list`. The `property::node::depends_on` property + can be passed here with a list of nodes to create dependency edges on. + +Returns: The external event wait node which has been added to the +graph. + +Exceptions: + +* Throws synchronously with error code `invalid` if any event in `eventList` is + not an external event. + +| +[source,c++] +---- +node add_external_event(const property_list& propList = {}); +---- + +|Adds an external event node to the graph, this node contains an external graph +event which represents the execution at this point in the graph. + +For more information on external events see <>. + +Constraints: + +* This member function is only available when the `command_graph` state is + `graph_state::modifiable`. + +Parameters: + +* `propList` - Zero or more properties can be provided to the constructed node + via an instance of `property_list`. The `property::node::depends_on` property + can be passed here with a list of nodes to create dependency edges on. + +Returns: The external event node which has been added to the graph. + | [source,c++] ---- @@ -1297,6 +1382,10 @@ public: event depEvent); event ext_oneapi_graph(command_graph& graph, const std::vector& depEvents); + + /* -- External events management -- */ + event ext_oneapi_external_event(const event& depEvent); + event ext_oneapi_external_event(const std::vector& depEvents); }; } // namespace sycl ---- @@ -1456,6 +1545,59 @@ all the nodes have finished execution. The queue should be associated with a device and context that are the same as the device and context used on creation of the graph. + +| +[source,c++] +---- +event queue::ext_oneapi_external_event(const event& depEvent); +---- + +|Create and return a graph external event representing the execution of +`depEvent`. This function should only be called on a queue which is in the +recording state. + +For more information on external events see <>. + +Parameters: + +* `depEvent` - The event which this external event depends on. The +external event will be considered complete when all of its dependencies are +complete. + +Returns: The external event representing this point of execution in the graph. + +Exceptions: + +* Throws synchronously with error code `invalid` if the queue is not + in recording mode. + +| +[source,c++] +---- +event queue::ext_oneapi_external_event(const std::vector& depEvents); +---- + +|Create and return a graph external event representing the execution of +`depEvents`. This function should only be called on a queue which is in the +recording state. + +For more information on external events see <>. + +Parameters: + +* `depEvents` - List of events which this external event depends on. The +external event will be considered complete when all of its dependencies are +complete. + +Returns: The external event representing this point of execution in the graph. + +Exceptions: + +* Throws synchronously with error code `invalid` if the queue is not + in recording mode. + |=== ==== New Handler Member Functions @@ -1549,6 +1691,202 @@ different from the one with which the dynamic_parameter was created. |=== +=== Event Class Modifications [[event-class-modifications]] + +[source, c++] +---- +namespace sycl::info { + enum class ext_oneapi_event_t{ + normal, + graph_limited, + graph_external + }; +} // namespace sycl::info +---- + +:event-class: https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:interface.event + +==== Event Type Query + +This extension adds the following new information descriptor to the event class +for use with `event::get_info<>()` which can be used to query the type of a SYCL +event. + +Table {counter: tableNumber}. Additions to the `sycl::event` class information descriptors. +[%header,cols="2a,a,a"] +|=== +|Event Descriptor +|Return Type +|Description + +|`info::event::ext_oneapi_event_type` +|`info::ext_oneapi_event_t` +|Returns the type of this SYCL event. +|=== + +Events of type `ext_oneapi_event_t::normal` are normal SYCL events as described +in the SYCL specification. `ext_oneapi_event_t::graph_limited` and +`ext_oneapi_event_t::graph_external` events are new as part of this extension, +and are differentiated from normal SYCL events in how they can be used. + +==== Graph-Limited Events + +Events associated with a graph node (those returned from recorded queue +submissions) cannot be used as normal SYCL events outside of their associated +graph, these are known as "graph-limited" events. They can only be used as +parameters to `handler::depends_on()`, or as dependent events for queue +shortcuts like `queue::parallel_for()` for other submissions recorded to +the same modifiable `command_graph`. + +The following limitations apply to graph-limited events: + +- Calling `event::get_info()` or +`event::get_profiling_info()` will throw synchronously with error code +`invalid`. + +- A host-side wait on the event will throw synchronously with error +code `invalid`. + +- Using the event outside of the recording scope will throw synchronously with +error code `invalid`. + +==== External Events + +To overcome the limitations of "graph-limited" events, users can add external +event nodes to a graph. These represent a point of execution within the graph +and provide a SYCL event object which can be used to both manage synchronization +between different graphs as well as between the graph and regular operations +submitted to a SYCL queue. External graph events are associated with a single +external event node inside a graph. External events are functionally normal SYCL +events with some restrictions (see below). + +External events can be obtained by either: + +* Calling `command_graph::add_external_event()` on a modifiable `command_graph` +to add an external event node, then obtain the SYCL event object by calling +`node::get_external_event()`. + +* Calling `queue::ext_oneapi_external_event()` on a queue in the recording +state, which will return an external SYCL event. + +External events may be used as parameters to `handler::depends_on()`, as +dependent events for queue submissions outside of the graph they are associated +with or as parameters to `command_graph::add_wait_external_event()`. For +example, this allows enqueing work outside of the graph part-way through the +graph execution, rather than having to wait for the entire graph to finish. + +External events are considered complete when all of their dependent graph nodes +have finished execution, and their execution status is automatically reset when +the executable graph which contains them is submitted to a queue for execution. + +External events have the following restrictions: + +* Profiling is not currently supported for external graph events and calling +`event::get_profiling_info()` will throw synchronously with error code +`invalid`. + +* External events cannot be passed as dependent events for submissions within +the same graph, and doing so will throw synchronously with error code `invalid`. +Graph-limited events should be used for this purpose. + +[source,c++] +---- +/* Basic usage example using the Record&Replay API. */ + +// Create the graph `A`. +sycl::queue queueA; +sycl_ext::command_graph graphA(queueA.get_context(), queueA.get_device()); + +// Start recording the queue. +graphA.begin_recording(queueA); + +// Submit kernel on the recorded queue. +// EventA is a graph-limited event that can only be used for managing +// graph internal dependencies. +EventA = queueA.submit([&](handler& cgh) {...}); + +event externalEventA = queueA.ext_oneapi_external_event({EventA}); + +// Stop recording the queue. +graphA.end_recording(); + +// Create another graph `B` +sycl::queue queueB; +sycl_ext::command_graph graphB(queueB.get_context(), queueB.get_device()); + +// Start recording the queue. +graphB.begin_recording(queueB); + +// Submit kernel on the recorded queue. +queueB.submit([&](handler& cgh) {...}); + +queueB.ext_oneapi_submit_barrier({externalEventA}); + +// Stop recording the queue. +graphB.end_recording(); + +// Finalize the modifiable graphs to create an executable graphs that can be +// submitted for execution. +auto execGraphA = graphA.finalize(); +auto execGraphB = graphB.finalize(); + +// Execute graph +for(int i = 0 ; i < Iterations, i++) { + queueA.submit([&](handler& cgh) { + cgh.ext_oneapi_graph(execGraphA); + }) + + queueB.submit([&](handler& cgh) { + cgh.ext_oneapi_graph(execGraphB); + }) +} +---- + +[source,c++] +---- +// Explicit API usage snippet + +// Add an external event node to Graph A +auto ExtEventNode = GraphA.add_external_event(); + +... + +// Add a wait external event node in GraphB +auto ExtWaitNode = GraphB.add_wait_external_event({ExtEventNode.get_external_event()}); +---- + +==== Normal SYCL Events + +Normal SYCL events from queue submissions outside of a `command_graph` may be +used as dependent events for queue submissions recorded to a `command_graph` or +passed to `handler::depends_on()` for nodes added explicitly to a +`command_graph`. These types of dependencies may be useful for one-off +operations that do not need to run on every graph execution but must be executed +before the main graph commands, such as warm-ups or initialization. + +[source,c++] +---- + +// Normal submission to a queue +auto NormalEvent = Queue.submit(...); + +Graph.begin_recording(Queue); + +auto GraphEvent = Queue.submit([&](handler& CGH){ + // OK! This node will depend on NormalEvent + CGH.depends_on(NormalEvent); + CGH.parallel_for(...); +}); + +Graph.end_recording(Queue); + +auto ExecGraph = Graph.finalize(); + +// The node associated with GraphEvent will be correctly ordered with respect to +// NormalEvent +Queue.ext_oneapi_graph(ExecGraph); +---- + === Thread Safety The new functions in this extension are thread-safe, the same as member @@ -1609,25 +1947,6 @@ of failure. The following list describes the behavior that changes during recording mode. Features not listed below behave the same in recording mode as they do in non-recording mode. -==== Event Limitations - -For queue submissions that are being recorded to a modifiable `command_graph`, -the only events that can be used as parameters to `handler::depends_on()`, or -as dependent events for queue shortcuts like `queue::parallel_for()`, are events -that have been returned from queue submissions recorded to the same modifiable -`command_graph`. - -Other limitations on the events returned from a submission to a queue in the -recording state are: - -- Calling `event::get_info()` or -`event::get_profiling_info()` will throw synchronously with error code `invalid`. - -- A host-side wait on the event will throw synchronously with error -code `invalid`. - -- Using the event outside of the recording scope will throw synchronously with error code -`invalid`. ==== Queue Limitations