diff --git a/src/psyclone/f2pygen.py b/src/psyclone/f2pygen.py index d772b88e2f..2ab917b6f7 100644 --- a/src/psyclone/f2pygen.py +++ b/src/psyclone/f2pygen.py @@ -161,7 +161,10 @@ class ACCDirective(Directive): 'loop'). ''' def __init__(self, root, line, position, dir_type): - self._types = ["parallel", "kernels", "enter data", "loop", "routine"] + self._types = [ + "parallel", "kernels", "enter data", "loop", "routine", + "wait" + ] self._positions = ["begin", "end"] super(ACCDirective, self).__init__(root, line, position, dir_type) diff --git a/src/psyclone/psyGen.py b/src/psyclone/psyGen.py index 8730288e4a..b0da418496 100644 --- a/src/psyclone/psyGen.py +++ b/src/psyclone/psyGen.py @@ -35,6 +35,8 @@ # Modified by I. Kavcic and L. Turner, Met Office # Modified by C.M. Maynard, Met Office / University of Reading # Modified by J. Henrichs, Bureau of Meteorology +# Modified S. Valat, Inria / Laboratoire Jean Kuntzmann +# Modified M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann # ----------------------------------------------------------------------------- ''' This module provides generic support for PSyclone's PSy code optimisation @@ -2582,11 +2584,11 @@ class KernelArgument(Argument): arguments as specified by the kernel argument metadata and the kernel invocation in the Algorithm layer. - :param arg: information obtained from the metadata for this kernel \ - argument. + :param arg: information obtained from the metadata for this kernel + argument. :type arg: :py:class:`psyclone.parse.kernel.Descriptor` - :param arg_info: information on how this argument is specified in \ - the Algorithm layer. + :param arg_info: information on how this argument is specified in + the Algorithm layer. :type arg_info: :py:class:`psyclone.parse.algorithm.Arg` :param call: the PSyIR kernel node to which this argument pertains. :type call: :py:class:`psyclone.psyGen.Kern` diff --git a/src/psyclone/psyir/backend/fortran.py b/src/psyclone/psyir/backend/fortran.py index 3617d4fe73..789c631b9d 100644 --- a/src/psyclone/psyir/backend/fortran.py +++ b/src/psyclone/psyir/backend/fortran.py @@ -1674,14 +1674,11 @@ def standalonedirective_node(self, node): result = f"{self._nindent}!${node.begin_string()}" clause_list = [] - # Currently no standalone directives have clauses associated - # so this code is left commented out. If a standalone directive - # is added with clauses, this should be added in. - # for clause in node.clauses: - # clause_list.append(self._visit(clause)) + for clause in node.clauses: + clause_list.append(self._visit(clause)) # Add a space only if there are clauses - # if len(clause_list) > 0: - # result = result + " " + if len(clause_list) > 0: + result = result + " " result = result + ", ".join(clause_list) result = result + "\n" diff --git a/src/psyclone/psyir/nodes/__init__.py b/src/psyclone/psyir/nodes/__init__.py index ddfd927e91..32af0c3728 100644 --- a/src/psyclone/psyir/nodes/__init__.py +++ b/src/psyclone/psyir/nodes/__init__.py @@ -84,7 +84,7 @@ ACCDirective, ACCLoopDirective, ACCEnterDataDirective, ACCParallelDirective, ACCKernelsDirective, ACCDataDirective, ACCUpdateDirective, ACCStandaloneDirective, ACCRegionDirective, - ACCRoutineDirective, ACCAtomicDirective) + ACCRoutineDirective, ACCAtomicDirective, ACCWaitDirective) from psyclone.psyir.nodes.omp_directives import ( OMPDirective, OMPDoDirective, OMPParallelDirective, OMPParallelDoDirective, OMPSingleDirective, OMPMasterDirective, diff --git a/src/psyclone/psyir/nodes/acc_clauses.py b/src/psyclone/psyir/nodes/acc_clauses.py index 79a8fb6f88..4075bbf1ab 100644 --- a/src/psyclone/psyir/nodes/acc_clauses.py +++ b/src/psyclone/psyir/nodes/acc_clauses.py @@ -37,10 +37,47 @@ ''' This module contains the implementations of the various OpenACC Directive Clause nodes.''' +from typing import Union + from psyclone.psyir.nodes.clause import Clause +from psyclone.psyir.nodes.datanode import DataNode +from psyclone.psyir.nodes.node import Node from psyclone.psyir.nodes.reference import Reference +class ACCAsyncQueueClause(Clause): + ''' + OpenACC async clause. Has one child which specifies which queue, if any, + this node is associated with. + + ''' + _children_valid_format = "DataNode" + _clause_string = "async" + + @staticmethod + def _validate_child(position: int, child: Node) -> bool: + ''' + Decides whether a given child and position are valid for this node. + Only zero or one child of type DataNode is permitted. + + :param position: the position to be validated. + :param child: a child to be validated. + + ''' + if position != 0: + return False + return isinstance(child, DataNode) + + @property + def queue(self) -> Union[DataNode, None]: + ''' + :returns: the queue specified by this clause (if any) + ''' + if self.children: + return self.children[0] + return None + + class ACCCopyClause(Clause): ''' OpenACC copy clause. Specifies a list of variables that are to be copied @@ -120,4 +157,5 @@ def _validate_child(position, child): return isinstance(child, Reference) -__all__ = ["ACCCopyClause", "ACCCopyInClause", "ACCCopyOutClause"] +__all__ = ["ACCAsyncQueueClause", "ACCCopyClause", + "ACCCopyInClause", "ACCCopyOutClause"] diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index 868e66a584..d9e4e959e0 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -36,26 +36,35 @@ # C.M. Maynard, Met Office / University of Reading # J. Henrichs, Bureau of Meteorology # Modified A. B. G. Chalk, STFC Daresbury Lab -# Modified J. G. Wallwork, Met Office / University of Cambridge +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# J. G. Wallwork, Met Office / University of Cambridge +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann # ----------------------------------------------------------------------------- ''' This module contains the implementation of the various OpenACC Directive nodes.''' import abc +from typing import Dict, List, Optional, Set, Tuple, Union from psyclone.core import Signature -from psyclone.f2pygen import DirectiveGen, CommentGen +from psyclone.f2pygen import BaseGen, CommentGen, DirectiveGen from psyclone.errors import GenerationError, InternalError -from psyclone.psyir.nodes.acc_clauses import (ACCCopyClause, ACCCopyInClause, - ACCCopyOutClause) +from psyclone.psyir.nodes.acc_clauses import ( + ACCAsyncQueueClause, ACCCopyClause, ACCCopyInClause, + ACCCopyOutClause) +from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes.assignment import Assignment +from psyclone.psyir.nodes.clause import Clause from psyclone.psyir.nodes.codeblock import CodeBlock +from psyclone.psyir.nodes.datanode import DataNode from psyclone.psyir.nodes.directive import (StandaloneDirective, RegionDirective) from psyclone.psyir.nodes.intrinsic_call import IntrinsicCall +from psyclone.psyir.nodes.node import Node from psyclone.psyir.nodes.psy_data_node import PSyDataNode from psyclone.psyir.nodes.routine import Routine +from psyclone.psyir.nodes.reference import Reference from psyclone.psyir.nodes.schedule import Schedule from psyclone.psyir.nodes.operation import BinaryOperation from psyclone.psyir.symbols import ScalarType @@ -73,6 +82,7 @@ class ACCDirective(metaclass=abc.ABCMeta): Note that classes inheriting from it must place the ACCDirective in front of the other Directive node sub-class, so that the Python MRO gives preference to this class's attributes. + ''' _PREFIX = "ACC" @@ -81,13 +91,16 @@ class ACCRegionDirective(ACCDirective, RegionDirective, metaclass=abc.ABCMeta): ''' Base class for all OpenACC region directive statements. ''' + # Textual description of the node. + _children_valid_format = "Schedule, Clause*" + def validate_global_constraints(self): ''' Perform validation checks for any global constraints. This can only be done at code-generation time. - :raises GenerationError: if this ACCRegionDirective encloses any form \ - of PSyData node since calls to PSyData routines within OpenACC \ + :raises GenerationError: if this ACCRegionDirective encloses any form + of PSyData node since calls to PSyData routines within OpenACC regions are not supported. ''' @@ -103,8 +116,26 @@ def validate_global_constraints(self): f"{[type(node).__name__ for node in data_nodes]} within a " f"region enclosed by an '{type(self).__name__}'") + @staticmethod + def _validate_child(position, child): + ''' + :param int position: the position to be validated. + :param child: a child to be validated. + :type child: :py:class:`psyclone.psyir.nodes.Node` + + :return: whether the given child and position are valid for this node. + :rtype: bool + + ''' + if position == 0: + return isinstance(child, Schedule) + return isinstance(child, Clause) + @property - def signatures(self): + def signatures(self) -> Union[ + Tuple[Set[Signature]], + Tuple[Set[Signature], Set[Signature]] + ]: ''' Returns a 1-tuple or a 2-tuple of sets depending on the working API. If a 1-tuple, the set includes both input and output signatures @@ -115,11 +146,8 @@ def signatures(self): device (probably a GPU) before the parallel region can be begun. :returns: 1-tuple or 2-tuple of input and output sets of variable names - :rtype: Union[Tuple[Set[:py:class:`psyclone.core.Signature`]], \ - Tuple[Set[:py:class:`psyclone.core.Signature`], \ - Set[:py:class:`psyclone.core.Signature`]]] - ''' + ''' # pylint: disable=import-outside-toplevel from psyclone.domain.lfric import LFRicInvokeSchedule from psyclone.gocean1p0 import GOInvokeSchedule @@ -142,20 +170,31 @@ class ACCStandaloneDirective(ACCDirective, StandaloneDirective, metaclass=abc.ABCMeta): ''' Base class for all standalone OpenACC directive statements. ''' + @staticmethod + def _validate_child(position: int, child: Node) -> bool: + ''' + :param position: the position to be validated. + :param child: a child to be validated. + + :return: whether the given child and position are valid for this node. + + ''' + # Ensure we call the _validate_child() in the correct parent class.. + return StandaloneDirective._validate_child(position, child) + class ACCRoutineDirective(ACCStandaloneDirective): ''' Class representing an "ACC routine" OpenACC directive in PSyIR. - :param str parallelism: the level of parallelism in the routine, one of - "gang", "worker", "vector", "seq". + :param parallelism: the level of parallelism in the routine, one of + "gang", "seq", "vector", "worker". ''' - SUPPORTED_PARALLELISM = ["seq", "vector", "worker", "gang"] + SUPPORTED_PARALLELISM = ["gang", "seq", "vector", "worker"] - def __init__(self, parallelism="seq", **kwargs): + def __init__(self, parallelism: str = "seq", **kwargs): self.parallelism = parallelism - super().__init__(self, **kwargs) @property @@ -169,14 +208,15 @@ def parallelism(self): return self._parallelism @parallelism.setter - def parallelism(self, value): + def parallelism(self, value: str): ''' - :param str value: the new value for the level-of-parallelism within - this routine (or a called one). + :param value: the new value for the level-of-parallelism within + this routine (or a called one). :raises TypeError: if `value` is not a str. :raises ValueError: if `value` is not a recognised level of - parallelism. + parallelism. + ''' if not isinstance(value, str): raise TypeError( @@ -188,12 +228,11 @@ def parallelism(self, value): f"of parallelism but got '{value}'") self._parallelism = value.lower() - def gen_code(self, parent): + def gen_code(self, parent: BaseGen): '''Generate the Fortran ACC Routine Directive and any associated code. :param parent: the parent Node in the Schedule to which to add our content. - :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` ''' # Check the constraints are correct self.validate_global_constraints() @@ -202,70 +241,83 @@ def gen_code(self, parent): parent.add(DirectiveGen(parent, "acc", "begin", "routine", f"{self.parallelism}")) - def begin_string(self): + def begin_string(self) -> str: '''Returns the beginning statement of this directive, i.e. "acc routine". The visitor is responsible for adding the correct directive beginning (e.g. "!$"). :returns: the opening statement of this directive. - :rtype: str ''' return f"acc routine {self.parallelism}" -class ACCEnterDataDirective(ACCStandaloneDirective): +class ACCEnterDataDirective(ACCStandaloneDirective, ACCAsyncMixin): ''' Class representing a "!$ACC enter data" OpenACC directive in an InvokeSchedule. Must be sub-classed for a particular API because the way in which fields are marked as being on the remote device is API-dependent. :param children: list of nodes which the directive should have as children. - :type children: List[:py:class:`psyclone.psyir.nodes.Node`] - :param parent: the node in the InvokeSchedule to which to add this \ - directive as a child. - :type parent: :py:class:`psyclone.psyir.nodes.Node` + :param parent: the node in the InvokeSchedule to which to add this + directive as a child. + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. + + :raises TypeError: if async_queue is of the wrong type. + ''' - def __init__(self, children=None, parent=None): + def __init__( + self, + children: List[Node] = None, + parent: Node = None, + async_queue: Union[bool, int, DataNode] = False + ): super().__init__(children=children, parent=parent) + + ACCAsyncMixin.__init__(self, async_queue) self._acc_dirs = None # List of parallel directives self._sig_set = set() - def gen_code(self, parent): + def gen_code(self, parent: BaseGen): '''Generate the elements of the f2pygen AST for this Node in the Schedule. :param parent: node in the f2pygen AST to which to add node(s). - :type parent: :py:class:`psyclone.f2pygen.BaseGen` :raises GenerationError: if no data is found to copy in. ''' self.validate_global_constraints() + self.lower_to_language_level() # Leverage begin_string() to raise an exception if there are no # variables to copyin but discard the generated string since it is # incompatible with class DirectiveGen() we are using below. self.begin_string() + # async + async_option = self._build_async_string() + # Add the enter data directive. sym_list = _sig_set_to_string(self._sig_set) copy_in_str = f"copyin({sym_list})" parent.add(DirectiveGen(parent, "acc", "begin", "enter data", - copy_in_str)) + copy_in_str + async_option)) # Call an API-specific subclass of this class in case # additional declarations are required. self.data_on_device(parent) parent.add(CommentGen(parent, "")) - def lower_to_language_level(self): + def lower_to_language_level(self) -> Node: ''' In-place replacement of this directive concept into language level PSyIR constructs. :returns: the lowered version of this node. - :rtype: :py:class:`psyclone.psyir.node.Node` ''' # We must generate a list of all of the fields accessed within OpenACC @@ -304,13 +356,19 @@ def begin_string(self): "Perhaps there are no ACCParallel or ACCKernels directives " "within the region?") - sym_list = _sig_set_to_string(self._sig_set) - # Variables need lexicographic sorting since sets guarantee no ordering # and members of composite variables must appear later in deep copies. - return f"acc enter data copyin({sym_list})" + sym_list = _sig_set_to_string(self._sig_set) - def data_on_device(self, parent): + # options + options = f" copyin({sym_list})" + + # async + options += self._build_async_string() + + return f"acc enter data{options}" + + def data_on_device(self, parent: Node): ''' Adds nodes into an InvokeSchedule to flag that the data required by the kernels in the data region is now on the device. The generic @@ -318,37 +376,67 @@ def data_on_device(self, parent): APIs if any infrastructure call is needed. :param parent: the node in the InvokeSchedule to which to add nodes - :type parent: :py:class:`psyclone.psyir.nodes.Node` ''' -class ACCParallelDirective(ACCRegionDirective): +class ACCParallelDirective(ACCRegionDirective, ACCAsyncMixin): ''' Class representing the !$ACC PARALLEL directive of OpenACC - in the PSyIR. By default it includes the 'DEFAULT(PRESENT)' clause which + in the PSyIR. By default it includes the `DEFAULT(PRESENT)` clause which means this node must either come after an EnterDataDirective or within a DataDirective. - :param bool default_present: whether this directive includes the - 'DEFAULT(PRESENT)' clause. + :param default_present: whether this directive includes the + `DEFAULT(PRESENT)` clause or not. + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. ''' - def __init__(self, default_present=True, **kwargs): + def __init__( + self, + async_queue: Union[bool, int, DataNode] = False, + default_present: bool = True, + **kwargs + ): super().__init__(**kwargs) + ACCAsyncMixin.__init__(self, async_queue) self.default_present = default_present - def gen_code(self, parent): + def __eq__(self, other) -> bool: + ''' + Checks whether two nodes are equal. Two ACCParallelDirective nodes are + equal if their default_present members are equal and they use the + same async_queue. + + :param object other: the object to check equality to. + + :returns: whether other is equal to self. + + ''' + is_eq = super().__eq__(other) + is_eq = is_eq and self.default_present == other.default_present + is_eq = is_eq and ACCAsyncMixin.__eq__(self, other) + + return is_eq + + def gen_code(self, parent: BaseGen): ''' Generate the elements of the f2pygen AST for this Node in the Schedule. :param parent: node in the f2pygen AST to which to add node(s). - :type parent: :py:class:`psyclone.f2pygen.BaseGen` ''' self.validate_global_constraints() - parent.add(DirectiveGen(parent, "acc", "begin", - *self.begin_string().split()[1:])) + # remove the "acc parallel" added by begin_string() and keep only the + # parameters + begin_args = ' '.join(self.begin_string().split()[2:]) + + # add the directive + parent.add(DirectiveGen(parent, "acc", "begin", "parallel", + begin_args)) for child in self.children: child.gen_code(parent) @@ -357,36 +445,37 @@ def gen_code(self, parent): self.gen_post_region_code(parent) - def begin_string(self): + def begin_string(self) -> str: ''' Returns the beginning statement of this directive, i.e. "acc parallel" plus any qualifiers. The backend is responsible for adding the correct characters to mark this as a directive (e.g. "!$"). :returns: the opening statement of this directive. - :rtype: str ''' + options = "" if self._default_present: # "default(present)" means that the compiler is to assume that # all data required by the parallel region is already present # on the device. If we've made a mistake and it isn't present # then we'll get a run-time error. - return "acc parallel default(present)" - return "acc parallel" + options = " default(present)" + options += self._build_async_string() + return f"acc parallel{options}" - def end_string(self): + def end_string(self) -> str: ''' :returns: the closing statement for this directive. - :rtype: str + ''' return "acc end parallel" @property - def default_present(self): + def default_present(self) -> bool: ''' :returns: whether the directive includes the 'default(present)' clause. - :rtype: bool + ''' return self._default_present @@ -406,13 +495,13 @@ def default_present(self, value): self._default_present = value @property - def fields(self): + def fields(self) -> List[str]: ''' Returns a list of the names of field objects required by the Kernel call(s) that are children of this directive. :returns: list of names of field arguments. - :rtype: List[str] + ''' # Look-up the kernels that are children of this node fld_list = [] @@ -427,21 +516,28 @@ class ACCLoopDirective(ACCRegionDirective): ''' Class managing the creation of a '!$acc loop' OpenACC directive. - :param int collapse: Number of nested loops to collapse into a single - iteration space or None. - :param bool independent: Whether or not to add the `independent` clause - to the loop directive. - :param bool sequential: whether or not to add the `seq` clause to the - loop directive. - :param bool gang: whether or not to add the `gang` clause to the - loop directive. - :param bool vector: whether or not to add the `vector` clause to the - loop directive. + :param collapse: Number of nested loops to collapse into a single + iteration space or None. + :param independent: Whether or not to add the `independent` clause + to the loop directive. + :param sequential: whether or not to add the `seq` clause to the + loop directive. + :param gang: whether or not to add the `gang` clause to the + loop directive. + :param vector: whether or not to add the `vector` clause to the + loop directive. :param kwargs: additional keyword arguments provided to the super class. - :type kwargs: unwrapped dict. + ''' - def __init__(self, collapse=None, independent=True, sequential=False, - gang=False, vector=False, **kwargs): + def __init__( + self, + collapse: int = None, + independent: bool = True, + sequential: bool = False, + gang: bool = False, + vector: bool = False, + **kwargs: Dict + ): self.collapse = collapse self._independent = independent self._sequential = sequential @@ -450,7 +546,7 @@ def __init__(self, collapse=None, independent=True, sequential=False, self._check_clauses_consistent() super().__init__(**kwargs) - def __eq__(self, other): + def __eq__(self, other) -> bool: ''' Checks whether two nodes are equal. Two ACCLoopDirective nodes are equal if their collapse, independent, sequential, gang, and vector @@ -459,7 +555,7 @@ def __eq__(self, other): :param object other: the object to check equality to. :returns: whether other is equal to self. - :rtype: bool + ''' is_eq = super().__eq__(other) is_eq = is_eq and self.collapse == other.collapse @@ -484,20 +580,19 @@ def _check_clauses_consistent(self): ) @property - def collapse(self): + def collapse(self) -> Union[int, None]: ''' :returns: the number of nested loops to collapse into a single \ iteration space for this node. - :rtype: int or None + ''' return self._collapse @collapse.setter - def collapse(self, value): + def collapse(self, value: Optional[int]): ''' :param value: optional number of nested loop to collapse into a \ single iteration space to parallelise. Defaults to None. - :type value: Optional[int] :raises TypeError: if the collapse value given is not an integer \ or NoneType. @@ -517,52 +612,51 @@ def collapse(self, value): self._collapse = value @property - def independent(self): + def independent(self) -> bool: ''' Returns whether the independent clause will be added to this loop directive. :returns: whether the independent clause will be added to this loop \ directive. - :rtype: bool + ''' return self._independent @property - def sequential(self): + def sequential(self) -> bool: ''' :returns: whether or not the `seq` clause is added to this loop \ directive. - :rtype: bool + ''' return self._sequential @property - def gang(self): + def gang(self) -> bool: ''' :returns: whether or not the `gang` clause is added to this loop directive. - :rtype: bool + ''' return self._gang @property - def vector(self): + def vector(self) -> bool: ''' :returns: whether or not the `vector` clause is added to this loop directive. - :rtype: bool + ''' return self._vector - def node_str(self, colour=True): - ''' - Returns the name of this node with (optional) control codes + def node_str(self, colour: bool = True) -> str: + '''Returns the name of this node with (optional) control codes to generate coloured output in a terminal that supports it. - :param bool colour: whether or not to include colour control codes. + :param colour: whether or not to include colour control codes. :returns: description of this node, possibly coloured. - :rtype: str + ''' self._check_clauses_consistent() text = self.coloured_name(colour) @@ -574,8 +668,7 @@ def node_str(self, colour=True): return text def validate_global_constraints(self): - ''' - Perform validation of those global constraints that can only be done + '''Perform validation of those global constraints that can only be done at code-generation time. :raises GenerationError: if this ACCLoopDirective is not enclosed @@ -596,16 +689,16 @@ def validate_global_constraints(self): super().validate_global_constraints() - def gen_code(self, parent): - ''' - Generate the f2pygen AST entries in the Schedule for this OpenACC + def gen_code(self, parent: BaseGen): + '''Generate the f2pygen AST entries in the Schedule for this OpenACC loop directive. :param parent: the parent Node in the Schedule to which to add our - content. - :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` - :raises GenerationError: if this "!$acc loop" is not enclosed within \ - an ACC Parallel region. + content. + + :raises GenerationError: if this "!$acc loop" is not enclosed within + an ACC Parallel region. + ''' self.validate_global_constraints() @@ -618,16 +711,15 @@ def gen_code(self, parent): for child in self.children: child.gen_code(parent) - def begin_string(self, leading_acc=True): + def begin_string(self, leading_acc: bool = True) -> str: ''' Returns the opening statement of this directive, i.e. "acc loop" plus any qualifiers. If `leading_acc` is False then the leading "acc loop" text is not included. - :param bool leading_acc: whether or not to include the leading \ - "acc loop" in the text that is returned. + :param leading_acc: whether or not to include the leading + "acc loop" in the text that is returned. :returns: the opening statement of this directive. - :rtype: str ''' clauses = [] @@ -648,33 +740,42 @@ def begin_string(self, leading_acc=True): clauses += [f"collapse({self.collapse})"] return " ".join(clauses) - def end_string(self): + def end_string(self) -> str: ''' Would return the end string for this directive but "acc loop" doesn't have a closing directive. :returns: empty string. - :rtype: str ''' return "" -class ACCKernelsDirective(ACCRegionDirective): - ''' - Class representing the !$ACC KERNELS directive in the PSyIR. +class ACCKernelsDirective(ACCRegionDirective, ACCAsyncMixin): + '''Class representing the `!$ACC KERNELS` directive in the PSyIR. - :param children: the PSyIR nodes to be enclosed in the Kernels region \ - and which are therefore children of this node. - :type children: List[:py:class:`psyclone.psyir.nodes.Node`] + :param children: the PSyIR nodes to be enclosed in the Kernels region + and which are therefore children of this node. :param parent: the parent of this node in the PSyIR. - :type parent: sub-class of :py:class:`psyclone.psyir.nodes.Node` - :param bool default_present: whether or not to add the "default(present)" \ - clause to the kernels directive. + :param bool default_present: whether or not to add the + "default(present)" clause to the kernels directive. + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. ''' - def __init__(self, children=None, parent=None, default_present=True): + _children_valid_format = "Schedule, Clause*" + + def __init__( + self, + children: List[Node] = None, + parent: Node = None, + default_present: bool = True, + async_queue: Union[bool, int, DataNode] = False + ): super().__init__(children=children, parent=parent) + ACCAsyncMixin.__init__(self, async_queue) self._default_present = default_present def __eq__(self, other): @@ -686,29 +787,45 @@ def __eq__(self, other): :returns: whether other is equal to self. :rtype: bool + ''' is_eq = super().__eq__(other) is_eq = is_eq and self.default_present == other.default_present + is_eq = is_eq and ACCAsyncMixin.__eq__(self, other) return is_eq - @property - def default_present(self): + @staticmethod + def _validate_child(position: int, child: Node) -> bool: ''' - :returns: whether the "default(present)" clause is added to the \ - kernels directive. + :param int position: the position to be validated. + :param child: a child to be validated. + :type child: :py:class:`psyclone.psyir.nodes.Node` + + :return: whether the given child and position are valid for this node. :rtype: bool + + ''' + if position == 0: + return isinstance(child, Schedule) + return isinstance(child, ACCAsyncQueueClause) + + @property + def default_present(self) -> bool: + ''' + :returns: whether the "default(present)" clause is added to the + kernels directive. + ''' return self._default_present - def gen_code(self, parent): + def gen_code(self, parent: BaseGen): ''' Generate the f2pygen AST entries in the Schedule for this OpenACC Kernels directive. - :param parent: the parent Node in the Schedule to which to add this \ - content. - :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` + :param parent: the parent Node in the Schedule to which to add this + content. ''' self.validate_global_constraints() @@ -718,34 +835,38 @@ def gen_code(self, parent): parent.add(DirectiveGen(parent, "acc", "begin", *self.begin_string().split()[1:])) for child in self.children: - child.gen_code(parent) + if not isinstance(child, Clause): + child.gen_code(parent) parent.add(DirectiveGen(parent, *self.end_string().split())) self.gen_post_region_code(parent) - def begin_string(self): + def begin_string(self) -> str: '''Returns the beginning statement of this directive, i.e. "acc kernels ...". The backend is responsible for adding the correct directive beginning (e.g. "!$"). :returns: the beginning statement for this directive. - :rtype: str ''' result = "acc kernels" + + # present if self._default_present: result += " default(present)" + + # async + result += self._build_async_string() + return result - def end_string(self): - ''' - Returns the ending statement for this directive. The backend is + def end_string(self) -> str: + '''Returns the ending statement for this directive. The backend is responsible for adding the language-specific syntax that marks this as a directive. :returns: the closing statement for this directive. - :rtype: str ''' return "acc end kernels" @@ -769,18 +890,16 @@ def gen_code(self, _): "ACCDataDirective.gen_code should not have been called.") @staticmethod - def _validate_child(position, child): + def _validate_child(position: int, child: Node) -> bool: ''' Check that the supplied node is a valid child of this node at the specified position. - :param int position: the proposed position of this child in the list + :param position: the proposed position of this child in the list of children. :param child: the proposed child node. - :type child: :py:class:`psyclone.psyir.nodes.Node` :returns: whether or not the proposed child and position are valid. - :rtype: bool ''' if position == 0: @@ -788,17 +907,16 @@ def _validate_child(position, child): return isinstance(child, (ACCCopyClause, ACCCopyInClause, ACCCopyOutClause)) - def begin_string(self): + def begin_string(self) -> str: ''' :returns: the beginning of the opening statement of this directive. - :rtype: str + ''' return "acc data" - def end_string(self): + def end_string(self) -> str: ''' :returns: the text for the end of this directive region. - :rtype: str ''' return "acc end data" @@ -840,38 +958,46 @@ def _update_data_movement_clauses(self): self.addchild(ACCCopyClause(children=list(readwrites.values()))) -class ACCUpdateDirective(ACCStandaloneDirective): +class ACCUpdateDirective(ACCStandaloneDirective, ACCAsyncMixin): ''' Class representing the OpenACC update directive in the PSyIR. It has - a direction attribute that can be set to 'self', 'host' or 'device', the + a direction attribute that can be set to `self`, `host` or `device`, the set of symbols being updated and an optional if_present clause. - :param signatures: the access signature(s) that need to be synchronised \ - with the device. - :type signatures: Set[:py:class:`psyclone.core.Signature`] - :param str direction: the direction of the synchronisation. + :param signatures: the access signature(s) that need to be synchronised + with the device. + :param direction: the direction of the synchronisation. :param children: list of nodes which the directive should have as children. - :type children: List[:py:class:`psyclone.psyir.nodes.Node`] - :param parent: the node in the InvokeSchedule to which to add this \ - directive as a child. - :type parent: :py:class:`psyclone.psyir.nodes.Node` - :param if_present: whether or not to include the 'if_present' - clause on the update directive (this instructs the - directive to silently ignore any variables that are not - on the device). - :type if_present: Optional[bool] + :param parent: the node in the InvokeSchedule to which to add this + directive as a child. + :param if_present: whether or not to include the `if_present` + clause on the update directive (this instructs the + directive to silently ignore any variables that are not + on the device). + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. + ''' _VALID_DIRECTIONS = ("self", "host", "device") - def __init__(self, signatures, direction, children=None, parent=None, - if_present=True): + def __init__( + self, + signatures: Signature, + direction: str, + children: List[Node] = None, + parent: Node = None, + if_present: Optional[bool] = True, + async_queue: Union[bool, int, DataNode] = False + ): super().__init__(children=children, parent=parent) - + ACCAsyncMixin.__init__(self, async_queue) self.sig_set = signatures self.direction = direction self.if_present = if_present - def __eq__(self, other): + def __eq__(self, other) -> bool: ''' Checks whether two nodes are equal. Two ACCUpdateDirective nodes are equal if their sig_set, direction and if_present members are equal. @@ -879,45 +1005,45 @@ def __eq__(self, other): :param object other: the object to check equality to. :returns: whether other is equal to self. - :rtype: bool + ''' is_eq = super().__eq__(other) is_eq = is_eq and self.sig_set == other.sig_set is_eq = is_eq and self.direction == other.direction is_eq = is_eq and self.if_present == other.if_present + is_eq = is_eq and ACCAsyncMixin.__eq__(self, other) return is_eq @property - def sig_set(self): + def sig_set(self) -> Signature: ''' :returns: the set of signatures to synchronise with the device. - :rtype: Set[:py:class:`psyclone.core.Signature`] + ''' return self._sig_set @property - def direction(self): + def direction(self) -> str: ''' :returns: the direction of the synchronisation. - :rtype: str + ''' return self._direction @property - def if_present(self): + def if_present(self) -> bool: ''' :returns: whether or not to add the 'if_present' clause. - :rtype: bool + ''' return self._if_present @sig_set.setter - def sig_set(self, signatures): + def sig_set(self, signatures: Signature): ''' :param signatures: the access signature(s) that need to be \ synchronised with the device. - :type signatures: Set[:py:class:`psyclone.core.Signature`] :raises TypeError: if signatures is not a set of access signatures. ''' @@ -930,9 +1056,9 @@ def sig_set(self, signatures): self._sig_set = signatures @direction.setter - def direction(self, direction): + def direction(self, direction: str): ''' - :param str direction: the direction of the synchronisation. + :param direction: the direction of the synchronisation. :raises ValueError: if the direction argument is not a string with \ value 'self', 'host' or 'device'. @@ -946,12 +1072,12 @@ def direction(self, direction): self._direction = direction @if_present.setter - def if_present(self, if_present): + def if_present(self, if_present: bool): ''' - :param bool if_present: whether or not to add the 'if_present' \ + :param if_present: whether or not to add the 'if_present' \ clause. - :raises TypeError: if if_present is not a boolean. + :raises TypeError: if `if_present` is not a boolean. ''' if not isinstance(if_present, bool): raise TypeError( @@ -960,14 +1086,13 @@ def if_present(self, if_present): self._if_present = if_present - def begin_string(self): + def begin_string(self) -> str: ''' Returns the beginning statement of this directive, i.e. "acc update host(symbol)". The backend is responsible for adding the correct characters to mark this as a directive (e.g. "!$"). :returns: the opening statement of this directive. - :rtype: str ''' if not self._sig_set: @@ -983,25 +1108,112 @@ def begin_string(self): condition = "if_present " if self._if_present else "" sym_list = _sig_set_to_string(self._sig_set) - return f"acc update {condition}{self._direction}({sym_list})" + # async + asyncvalue = self._build_async_string() + + return \ + f"acc update {condition}{self._direction}({sym_list}){asyncvalue}" -def _sig_set_to_string(sig_set): +def _sig_set_to_string(sig_set: Set[Signature]) -> str: ''' Converts the provided set of signatures into a lexically sorted string of comma-separated signatures which also includes, for signatures that represent variables of a derived type, the composing subsignatures. :param sig_set: set of signature(s) to include in the string. - :type sig_set: Set[:py:class:`psyclone.core.Signature`] :returns: a lexically sorted string of comma-separated (sub)signatures. - :rtype: str ''' names = {s[:i+1].to_language() for s in sig_set for i in range(len(s))} return ",".join(sorted(names)) +class ACCWaitDirective(ACCStandaloneDirective): + ''' + Class representing the !$ACC WAIT directive in the PSyIR. + + :param wait_queue: Which ACC async stream to wait. None to wait all. + + ''' + def __init__(self, wait_queue: Union[Reference, int, None] = None): + # call parent + super().__init__() + self.wait_queue = wait_queue + + def __eq__(self, other) -> bool: + ''' + Test the equality of two directives. + + :returns: If the two directives are equals. + + ''' + is_eq = super().__eq__(other) + is_eq = is_eq and self._wait_queue == other._wait_queue + return is_eq + + @property + def wait_queue(self) -> Union[int, Reference, None]: + ''' + :returns: The queue to wait on. + + ''' + return self._wait_queue + + @wait_queue.setter + def wait_queue(self, wait_queue: Union[int, Reference, None]): + ''' + Setter to assign a specific wait queue to wait for. + + :param wait_queue: The wait queue to expect, or None for all. + + :raises TypeError: if `wait_queue` is of the wrong type + ''' + # check + if (wait_queue is not None + and not isinstance(wait_queue, (int, Reference))): + raise TypeError("Invalid value type as wait_group, shoule be" + "in (None, int, Signature) !") + + # set + self._wait_queue = wait_queue + + def gen_code(self, parent: BaseGen): + ''' + Generate the given directive code to add it to the call tree. + + :param parent: the parent Node in the Schedule to which to add this \ + content. + ''' + # remove the "acc wait" added by begin_string() and keep only the + # parameters + args = ' '.join(self.begin_string().split()[2:]) + + # Generate the directive + parent.add(DirectiveGen(parent, "acc", "begin", "wait", args)) + + def begin_string(self) -> str: + '''Returns the beginning statement of this directive, i.e. + "acc wait ...". The backend is responsible for adding the + correct directive beginning (e.g. "!$"). + + :returns: the beginning statement for this directive. + + ''' + # default basic directive + result = "acc wait" + + # handle specifying groups + if self._wait_queue is not None: + if isinstance(self._wait_queue, Reference): + result += f" ({self._wait_queue.name})" + else: + result += f" ({self._wait_queue})" + + # ok return it + return result + + class ACCAtomicDirective(ACCRegionDirective): ''' OpenACC directive to represent that the memory accesses in the associated @@ -1010,32 +1222,28 @@ class ACCAtomicDirective(ACCRegionDirective): currently unsupported in the PSyIR. ''' - def begin_string(self): + def begin_string(self) -> str: ''' :returns: the opening string statement of this directive. - :rtype: str ''' return "acc atomic" - def end_string(self): + def end_string(self) -> str: ''' :returns: the ending string statement of this directive. - :rtype: str ''' return "acc end atomic" @staticmethod - def is_valid_atomic_statement(stmt): + def is_valid_atomic_statement(stmt: Node) -> bool: ''' Check if a given statement is a valid OpenACC atomic expression. :param stmt: a node to be validated. - :type stmt: :py:class:`psyclone.psyir.nodes.Node` :returns: whether a given statement is compliant with the OpenACC atomic expression. - :rtype: bool ''' if not isinstance(stmt, Assignment): @@ -1094,4 +1302,5 @@ def validate_global_constraints(self): __all__ = ["ACCRegionDirective", "ACCEnterDataDirective", "ACCParallelDirective", "ACCLoopDirective", "ACCKernelsDirective", "ACCDataDirective", "ACCUpdateDirective", "ACCStandaloneDirective", - "ACCDirective", "ACCRoutineDirective", "ACCAtomicDirective"] + "ACCDirective", "ACCRoutineDirective", "ACCAtomicDirective", + "ACCWaitDirective"] diff --git a/src/psyclone/psyir/nodes/acc_mixins.py b/src/psyclone/psyir/nodes/acc_mixins.py new file mode 100644 index 0000000000..edda1a1853 --- /dev/null +++ b/src/psyclone/psyir/nodes/acc_mixins.py @@ -0,0 +1,210 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2021-2025, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Author: S. Valat, Inria / Lab. Jean Kuntzmann +# Modified: M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann +# A. R. Porter, STFC Daresbury Laboratory +# ----------------------------------------------------------------------------- + +''' This module contains the mixins to apply some ACC features on many +classes.''' + +import abc +from typing import Union + +from psyclone.psyir.nodes.acc_clauses import ACCAsyncQueueClause +from psyclone.psyir.nodes.datanode import DataNode +from psyclone.psyir.nodes.literal import Literal +from psyclone.psyir.symbols import INTEGER_TYPE + + +class ACCAsyncMixin(metaclass=abc.ABCMeta): + ''' + Class handling the common code to handle the async keyword on related acc + directives. + + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Use int to attach to the given stream ID or + use a PSyIR expression to say at runtime what stream + to be used. + ''' + def __init__( + self, + async_queue: Union[bool, int, DataNode] = False + ): + clause = self._create_clause(async_queue) + if clause: + self.addchild(clause) + + @staticmethod + def convert_queue( + async_queue: Union[bool, int, DataNode]) -> Union[bool, DataNode]: + ''' + Utility to convert the provided queue value to PSyIR when + applicable. + + :param async_queue: the queue value to convert. + + :returns: PSyIR of queue value or bool specifying whether or not async + is enabled. + + :raises TypeError: if the supplied queue value is of unsupported type. + + ''' + if isinstance(async_queue, bool): + qarg = async_queue + elif isinstance(async_queue, int): + qarg = Literal(f"{async_queue}", INTEGER_TYPE) + elif isinstance(async_queue, DataNode): + qarg = async_queue + else: + raise TypeError(f"Invalid async_queue value, expected DataNode, " + f"integer or bool, got : {async_queue}") + return qarg + + @staticmethod + def _create_clause( + async_queue: Union[bool, int, DataNode] + ) -> Union[ACCAsyncQueueClause, None]: + ''' + Utility to create a new ACCAsyncQueueClause for the specified queue. + + :param async_queue: the queue value to use for the async clause (or + True to enable on default queue or False to disable). + + :returns: a new ACCAsyncQueueClause if async is enabled and None + otherwise. + + :raises TypeError: if `async_queue` is of the wrong type + + ''' + if async_queue is False: + # There's no async clause. + return None + # Convert async_queue value to PSyIR if necessary and + # add as child of clause. + qarg = ACCAsyncMixin.convert_queue(async_queue) + clause = ACCAsyncQueueClause() + if qarg and qarg is not True: + # A specific queue is supplied. + clause.addchild(qarg) + # No queue is specified + return clause + + @property + def async_clause(self) -> Union[ACCAsyncQueueClause, None]: + ''' + :returns: the queue clause associated with this node or None. + ''' + for child in self.clauses: + if isinstance(child, ACCAsyncQueueClause): + return child + return None + + @property + def async_queue(self) -> Union[bool, int, DataNode]: + ''' + :returns: whether or not async is enabled and if so, which queue this + node is associated with. (True indicates the default stream.) + Can use False to disable, True to enable on default stream. + Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. + ''' + clause = self.async_clause + if clause: + if clause.queue is None: + # async is enabled on the default stream. + return True + return clause.queue + # No clause => async is not enabled. + return False + + @async_queue.setter + def async_queue(self, async_queue: Union[bool, int, DataNode]): + ''' + Set the asynchronous behaviour associated with this node. + + :param async_queue: Enable async support and attach it to the given + queue. Can use False to disable, True to enable on + default stream. Int to attach to the given stream + ID or use a PSyIR expression to say at runtime + which stream to be used. + ''' + # `clause` will be None if async support is disabled. + clause = ACCAsyncMixin._create_clause(async_queue) + existing = self.async_clause + if existing: + # This node already had an ACCAsyncQueueClause so we have to either + # replace or remove it. + if clause: + existing.replace_with(clause) + else: + existing.detach() + else: + if clause: + # No existing clause but async support is now enabled so add + # the new clause. + self.addchild(clause) + + def _build_async_string(self) -> str: + ''' + Build the async arg to concat to the acc directive when generating the + code in the old, 'gen_code' path. + + :returns: The "async[()]" option to add to the directive. + + ''' + result = "" + + # async + clause = self.async_clause + if clause: + # pylint: disable=import-outside-toplevel + from psyclone.psyir.backend.fortran import FortranWriter + result = f" {FortranWriter()(clause)}" + + return result + + def __eq__(self, other) -> bool: + ''' + Checks whether two nodes are equal. Two ACCAsyncMixin are + equal if their async_queue members are equal. + + :param object other: the object to check equality to. + + :returns: whether other is equal to self. + ''' + if type(self) is not type(other): + return False + return self.async_queue == other.async_queue diff --git a/src/psyclone/psyir/nodes/directive.py b/src/psyclone/psyir/nodes/directive.py index fffb221651..ecff96f359 100644 --- a/src/psyclone/psyir/nodes/directive.py +++ b/src/psyclone/psyir/nodes/directive.py @@ -43,6 +43,7 @@ import abc from collections import OrderedDict +from typing import List from psyclone.configuration import Config from psyclone.core import Signature, VariablesAccessInfo @@ -50,6 +51,7 @@ from psyclone.f2pygen import CommentGen from psyclone.psyir.nodes.array_of_structures_reference import ( ArrayOfStructuresReference) +from psyclone.psyir.nodes.clause import Clause from psyclone.psyir.nodes.loop import Loop from psyclone.psyir.nodes.reference import Reference from psyclone.psyir.nodes.schedule import Schedule @@ -290,8 +292,9 @@ class StandaloneDirective(Directive): (e.g. OpenMP, OpenACC, compiler-specific) inherit from this class. ''' - # Textual description of the node. - _children_valid_format = None + # Textual description of the node. A standalone directive may only have + # Clauses as children. + _children_valid_format = "Clause*" @staticmethod def _validate_child(position, child): @@ -304,20 +307,15 @@ def _validate_child(position, child): :rtype: bool ''' - # Children are not allowed for StandaloneDirective - return False + # Only clauses are permitted. + return isinstance(child, Clause) @property - def clauses(self): + def clauses(self) -> List[Clause]: ''' :returns: the Clauses associated with this directive. - :rtype: List of :py:class:`psyclone.psyir.nodes.Clause` ''' - # This should be uncommented once a standalone directive with - # clauses exists - # if len(self.children) > 0: - # return self.children - return [] + return self.children # For automatic API documentation generation diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index 285ac7db26..07469f490b 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -35,16 +35,21 @@ # A. B. G. Chalk STFC Daresbury Lab # J. Henrichs, Bureau of Meteorology # Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann ''' This module provides the ACCKernelsTrans transformation. ''' import re +from typing import Any, Dict, List, Union from psyclone import psyGen +from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes import ( - ACCKernelsDirective, Assignment, Call, CodeBlock, Loop, PSyDataNode, - Reference, Return, Routine, Statement, WhileLoop) -from psyclone.psyir.symbols import UnsupportedFortranType + ACCEnterDataDirective, ACCKernelsDirective, Assignment, + Call, CodeBlock, Literal, Loop, Node, + PSyDataNode, Reference, Return, Routine, Statement, WhileLoop) +from psyclone.psyir.symbols import INTEGER_TYPE, UnsupportedFortranType from psyclone.psyir.transformations.region_trans import RegionTrans from psyclone.psyir.transformations.transformation_error import ( TransformationError) @@ -74,20 +79,30 @@ class ACCKernelsTrans(RegionTrans): excluded_node_types = (CodeBlock, Return, PSyDataNode, psyGen.HaloExchange, WhileLoop) - def apply(self, node, options=None): + def apply( + self, + node: Union[Node, List[Node]], + options: Dict[str, Any] = {} + ): ''' Enclose the supplied list of PSyIR nodes within an OpenACC Kernels region. :param node: a node or list of nodes in the PSyIR to enclose. - :type node: :py:class:`psyclone.psyir.nodes.Node` | - list[:py:class:`psyclone.psyir.nodes.Node`] :param options: a dictionary with options for transformations. - :type options: Optional[Dict[str, Any]] :param bool options["default_present"]: whether or not the kernels region should have the 'default present' attribute (indicating that data is already on the accelerator). When using managed memory this option should be False. + :param bool options["disable_loop_check"]: whether to disable the check + that the supplied region contains 1 or more loops. Default is False + (i.e. the check is enabled). + :param options["async_queue"]: whether or not to add the 'async' clause + to the new directive and if so, which queue to associate it with. + True to enable for the default queue or a queue value specified + with an int or PSyIR expression. + :type options["async_queue"]: + Union[bool, :py:class:`psyclone.psyir.nodes.DataNode`] ''' # Ensure we are always working with a list of nodes, even if only @@ -99,18 +114,80 @@ def apply(self, node, options=None): parent = node_list[0].parent start_index = node_list[0].position - if not options: - options = {} default_present = options.get("default_present", False) + async_queue = options.get("async_queue", False) # Create a directive containing the nodes in node_list and insert it. directive = ACCKernelsDirective( parent=parent, children=[node.detach() for node in node_list], - default_present=default_present) + default_present=default_present, async_queue=async_queue) parent.children.insert(start_index, directive) - def validate(self, nodes, options=None): + @staticmethod + def check_async_queue( + nodes: List[Node], + async_queue: Union[bool, int, Reference] + ): + ''' + Common function to check that all parent data directives have + the same async queue. + + :param node: the nodes in the PSyIR to enclose. + :param async_queue: The async queue to expect in ancestors. + + :raises TypeError: if the supplied queue is of the wrong type. + :raises TransformationError: if the supplied queue does not match + that specified by any ancestor nodes. + ''' + def _to_str(val): + return (f"'{val.debug_string()}'" if isinstance(val, Node) + else "None") + + if async_queue is False: + # The kernels directive will not have the async clause. + return + + # check type (a bool is an instance of int) and ensure the supplied + # value is in a form suitable for comparison with values already + # stored in the PSyIR. + if isinstance(async_queue, bool): + # A value of True means that async is specified with no queue. + checkval = None + elif isinstance(async_queue, int): + checkval = Literal(f"{async_queue}", INTEGER_TYPE) + elif isinstance(async_queue, Reference): + checkval = async_queue + else: + raise TypeError(f"Invalid async_queue value, expect Reference or " + f"integer or None or bool, got : {async_queue}") + + # Perform an additional check whether a queue has been used before. + # Note this to work only for the current routine. + parent = nodes[0].ancestor(ACCAsyncMixin) + if parent is not None: + if checkval != parent.async_queue: + raise TransformationError( + f"Cannot apply ACCKernelsTrans with asynchronous " + f"queue {_to_str(checkval)} because a parent directive " + f"specifies queue {_to_str(parent.async_queue)}") + + parent = nodes[0].ancestor(Routine) + if parent: + edata = parent.walk(ACCEnterDataDirective) + if edata: + if checkval != edata[0].async_queue: + raise TransformationError( + f"Cannot apply ACCKernelsTrans with asynchronous queue" + f" {_to_str(checkval)} because the containing routine " + f"has an ENTER DATA directive specifying queue " + f"{_to_str(edata[0].async_queue)}") + + def validate( + self, + nodes: List[Node], + options: Dict[str, Any] = {} + ): # pylint: disable=signature-differs ''' Check that we can safely enclose the supplied node or list of nodes @@ -118,12 +195,20 @@ def validate(self, nodes, options=None): :param nodes: the proposed PSyIR node or nodes to enclose in the kernels region. - :type nodes: (list of) :py:class:`psyclone.psyir.nodes.Node` :param options: a dictionary with options for transformations. - :type options: Optional[Dict[str, Any]] + :param bool options["default_present"]: whether or not the kernels + region should have the 'default present' attribute (indicating + that data is already on the accelerator). When using managed + memory this option should be False. :param bool options["disable_loop_check"]: whether to disable the check that the supplied region contains 1 or more loops. Default is False (i.e. the check is enabled). + :param options["async_queue"]: whether or not to add the 'async' clause + to the new directive and if so, which queue to associate it with. + True to enable for the default queue or a queue value specified + with an int or PSyIR expression. + :type options["async_queue"]: + Union[bool, :py:class:`psyclone.psyir.nodes.DataNode`] :raises NotImplementedError: if the supplied Nodes belong to a GOInvokeSchedule. @@ -187,6 +272,10 @@ def validate(self, nodes, options=None): f"Cannot include '{icall.debug_string()}' in an " f"OpenACC region because it is not available on GPU.") + # extract async option and check validity + async_queue = options.get('async_queue', False) + self.check_async_queue(node_list, async_queue) + # Check that we have at least one loop or array range within # the proposed region unless this has been disabled. if options and options.get("disable_loop_check", False): diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index a9bcca8eb3..7b5abb85dc 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -33,8 +33,10 @@ # ----------------------------------------------------------------------------- # Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab # Modified I. Kavcic, Met Office -# Modified A. B. G. Chalk, STFC Daresbury Lab -# Modified J. G. Wallwork, Met Office / University of Cambridge +# A. B. G. Chalk, STFC Daresbury Lab +# J. G. Wallwork, Met Office / University of Cambridge +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann # ----------------------------------------------------------------------------- ''' Performs py.test tests on the OpenACC PSyIR Directive nodes. ''' @@ -47,25 +49,20 @@ from psyclone.f2pygen import ModuleGen from psyclone.parse.algorithm import parse from psyclone.psyGen import PSyFactory -from psyclone.psyir.nodes import (ACCKernelsDirective, - ACCLoopDirective, - ACCParallelDirective, - ACCRegionDirective, - ACCRoutineDirective, - ACCUpdateDirective, - ACCAtomicDirective, - Assignment, - Literal, - Reference, - Return, - Routine) +from psyclone.psyir.nodes.array_reference import ArrayReference +from psyclone.psyir.nodes.acc_directives import ACCAsyncMixin +from psyclone.psyir.nodes import ( + ACCEnterDataDirective, ACCKernelsDirective, ACCLoopDirective, + ACCParallelDirective, ACCRegionDirective, ACCRoutineDirective, + ACCUpdateDirective, ACCAtomicDirective, ACCWaitDirective, Assignment, + BinaryOperation, Literal, Reference, Return, Routine, Schedule) from psyclone.psyir.nodes.loop import Loop -from psyclone.psyir.nodes.schedule import Schedule -from psyclone.psyir.symbols import SymbolTable, DataSymbol, INTEGER_TYPE +from psyclone.psyir.symbols import ( + Symbol, SymbolTable, DataSymbol, INTEGER_TYPE, UnresolvedType) from psyclone.psyir.transformations import ACCKernelsTrans from psyclone.transformations import ( ACCDataTrans, ACCEnterDataTrans, ACCLoopTrans, - ACCParallelTrans, ACCRoutineTrans) + ACCParallelTrans, ACCRoutineTrans, TransformationError) BASE_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( os.path.abspath(__file__)))), "test_files", "dynamo0p3") @@ -221,6 +218,39 @@ def test_accenterdatadirective_gencode_4(trans1, trans2): "nlayers_f1,undf_w1,undf_w2,undf_w3)\n" in code) +# (3/4) Method gen_code +def test_accenterdatadirective_gencode_3_async(): + '''Test that we can add the async directive on enter data.''' + API = "lfric" + acc_trans = ACCKernelsTrans() + acc_enter_trans = ACCEnterDataTrans() + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"), api=API) + psy = PSyFactory(distributed_memory=False, api=API).create(info) + sched = psy.invokes.get('invoke_0_testkern_type').schedule + acc_trans.apply(sched.children, options={"async_queue": 3}) + acc_enter_trans.apply(sched, options={"async_queue": 3}) + code = str(psy.gen) + assert ( + " !$acc enter data copyin(f1_data,f2_data,m1_data,m2_data,map_w1," + "map_w2,map_w3,ndf_w1,ndf_w2,ndf_w3,nlayers_f1," + "undf_w1,undf_w2,undf_w3) async(3)\n" in code) + + +# (3/4) Method gen_code +def test_accenterdatadirective_gencode_3_async_error(): + '''Test that we can add the async directive on enter data.''' + API = "lfric" + acc_trans = ACCKernelsTrans() + acc_enter_trans = ACCEnterDataTrans() + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"), api=API) + psy = PSyFactory(distributed_memory=False, api=API).create(info) + sched = psy.invokes.get('invoke_0_testkern_type').schedule + acc_trans.apply(sched.children) + with pytest.raises(TransformationError) as error: + acc_enter_trans.apply(sched, options={"async_queue": 3}) + assert 'async_queue different' in str(error.value) + + # Class ACCLoopDirective start def test_accloopdirective_node_str_default(monkeypatch): @@ -427,6 +457,51 @@ def test_acckernelsdirective_gencode(default_present): " !$acc end kernels\n" in code) +# (1/1) Method gen_code +@pytest.mark.parametrize("async_queue", [ + False, True, 1, 0, + Reference(Symbol('stream1')), + ArrayReference.create(DataSymbol( + 'stream2', + UnresolvedType()), + [Literal("1", INTEGER_TYPE)] + ) + ]) +def test_acckernelsdirective_gencode_async_queue(async_queue): + '''Check that the gen_code method in the ACCKernelsDirective class + generates the expected code. Use the dynamo0.3 API. + + ''' + API = "lfric" + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"), api=API) + psy = PSyFactory(distributed_memory=False, api=API).create(info) + sched = psy.invokes.get('invoke_0_testkern_type').schedule + + trans = ACCKernelsTrans() + trans.apply(sched, {"async_queue": async_queue}) + + code = str(psy.gen) + string = "" + if async_queue is None: + string = "" + elif isinstance(async_queue, bool) and async_queue is True: + string = " async" + elif isinstance(async_queue, bool) and async_queue is False: + string = "" + elif isinstance(async_queue, int): + string = f" async({async_queue})" + elif isinstance(async_queue, ArrayReference): + string = " async(stream2(1))" + elif isinstance(async_queue, Reference): + string = " async(stream1)" + assert ( + f" !$acc kernels{string}\n" + f" DO cell = loop0_start, loop0_stop, 1\n" in code) + assert ( + " END DO\n" + " !$acc end kernels\n" in code) + + def test_acckerneldirective_equality(): ''' Test the __eq__ method of ACCKernelsDirective node. ''' # We need to manually set the same SymbolTable instance in both directives @@ -472,14 +547,14 @@ def test_acc_routine_parallelism(): assert target.parallelism == "seq" target.parallelism = "vector" assert target.parallelism == "vector" - with pytest.raises(TypeError) as err: + with pytest.raises(TypeError) as einfo: target.parallelism = 1 assert ("Expected a str to specify the level of parallelism but got 'int'" - in str(err.value)) - with pytest.raises(ValueError) as err: + in str(einfo.value)) + with pytest.raises(ValueError) as einfo: target.parallelism = "sequential" - assert ("Expected one of ['seq', 'vector', 'worker', 'gang'] for the level" - " of parallelism but got 'sequential'" in str(err.value)) + assert ("Expected one of ['gang', 'seq', 'vector', 'worker'] for the level" + " of parallelism but got 'sequential'" in str(einfo.value)) # Class ACCUpdateDirective @@ -515,6 +590,17 @@ def test_accupdatedirective_init(): directive = ACCUpdateDirective(sig, "host", if_present=False) assert directive.if_present is False + assert directive.async_queue is False + + directive = ACCUpdateDirective(sig, "host", async_queue=True) + assert directive.async_queue is True + + directive = ACCUpdateDirective(sig, "host", async_queue=1) + assert directive.async_queue.value == "1" + + directive = ACCUpdateDirective(sig, "host", + async_queue=Reference(Symbol("var"))) + assert directive.async_queue == Reference(Symbol("var")) def test_accupdatedirective_begin_string(): @@ -524,9 +610,21 @@ def test_accupdatedirective_begin_string(): directive_host = ACCUpdateDirective(sig, "host", if_present=False) directive_device = ACCUpdateDirective(sig, "device") directive_empty = ACCUpdateDirective(set(), "host", if_present=False) + directive_async_default = ACCUpdateDirective(sig, "device", + async_queue=True) + directive_async_queue_int = ACCUpdateDirective(sig, "device", + async_queue=1) + directive_async_queue_str = ACCUpdateDirective( + sig, "device", async_queue=Reference(Symbol("var"))) assert directive_host.begin_string() == "acc update host(x)" assert directive_device.begin_string() == "acc update if_present device(x)" + assert (directive_async_default.begin_string() == + "acc update if_present device(x) async") + assert (directive_async_queue_int.begin_string() == + "acc update if_present device(x) async(1)") + assert (directive_async_queue_str.begin_string() == + "acc update if_present device(x) async(var)") with pytest.raises(GenerationError) as err: directive_empty.begin_string() @@ -554,6 +652,129 @@ def test_accupdatedirective_equality(): assert directive1 != directive5 +# Class ACCWaitDirective + +def test_accwaitdirective_init(): + '''Test init of ACCWaitDirective.''' + + directive1 = ACCWaitDirective(None) + assert directive1.wait_queue is None + + directive2 = ACCWaitDirective(0) + assert directive2.wait_queue == 0 + + directive3 = ACCWaitDirective(1) + assert directive3.wait_queue == 1 + + directive4 = ACCWaitDirective(Reference(Symbol("variable_name"))) + assert directive4.wait_queue == Reference(Symbol("variable_name")) + + with pytest.raises(TypeError) as error: + _ = ACCWaitDirective(3.5) + assert 'Invalid value type as wait_group' in str(error) + + +def test_accwaitdirective_begin_string(): + '''Test begin_string of ACCWaitDirective.''' + + directive1 = ACCWaitDirective(None) + assert directive1.begin_string() == "acc wait" + + directive2 = ACCWaitDirective(1) + assert directive2.begin_string() == "acc wait (1)" + + directive3 = ACCWaitDirective(Reference(Symbol("variable_name"))) + assert directive3.begin_string() == "acc wait (variable_name)" + + +def test_accwaitdirective_gencode(): + '''Test gen code of ACCWaitDirective''' + API = "lfric" + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"), api=API) + psy = PSyFactory(distributed_memory=False, api=API).create(info) + routines = psy.container.walk(Routine) + routines[0].children.append(ACCWaitDirective(1)) + code = str(psy.gen) + assert '$acc wait (1)' in code + + +def test_accwaitdirective_eq(): + '''Test the __eq__ implementation of ACCWaitDirective.''' + + # build some + directive1 = ACCWaitDirective(1) + directive2 = ACCWaitDirective(1) + directive3 = ACCWaitDirective(Reference(Symbol('stream1'))) + + # check equality + assert directive1 == directive2 + assert not (directive1 == directive3) + +# async keyword on all classes + + +@pytest.mark.parametrize("directive_type", + [ACCKernelsDirective, ACCParallelDirective, + ACCUpdateDirective, ACCEnterDataDirective]) +def test_directives_async_queue(directive_type): + '''Validate the various usage of async_queue parameter''' + + # args + args = [] + if directive_type == ACCUpdateDirective: + args = [[Signature('x')], 'host'] + + # set value at init + directive = directive_type(*args, async_queue=1) + + # need to have some data in + if directive_type == ACCEnterDataDirective: + directive._sig_set.add(Signature("x")) + + # check initial status + assert directive.async_queue.value == "1" + assert 'async(1)' in directive.begin_string() + + # change value to true + directive.async_queue = True + assert directive.async_queue is True + assert 'async' in directive.begin_string() + + # change value to False + directive.async_queue = False + assert directive.async_queue is False + assert 'async' not in directive.begin_string() + + # change value afterward + directive.async_queue = Reference(Symbol("stream")) + assert directive.async_queue == Reference(Symbol("stream")) + assert 'async(stream)' in directive.begin_string() + + # Value is a PSyIR expression + directive.async_queue = BinaryOperation.create( + BinaryOperation.Operator.ADD, + Literal("1", INTEGER_TYPE), + Reference(Symbol("stream"))) + assert 'async(1 + stream)' in directive.begin_string() + + # put wrong type + with pytest.raises(TypeError) as error: + directive.async_queue = 3.5 + assert "Invalid async_queue" in str(error) + + +def test_mixin_constructor_error(): + ''' + Check constructor with an unexpected value type (float instead of int) + + ''' + with pytest.raises(TypeError) as error: + _ = ACCAsyncMixin(3.5) + + assert ("Invalid async_queue value, expected DataNode, integer " + "or bool, got : 3.5" in str(error)) + + def test_accdatadirective_update_data_movement_clauses(fortran_reader, fortran_writer): '''Test that the data movement clauses are constructed correctly for the @@ -609,9 +830,13 @@ def test_accparalleldirective(): assert accpar._default_present is True # Also without default(present) - accpar = ACCParallelDirective(default_present=False) - assert isinstance(accpar, ACCParallelDirective) - assert accpar._default_present is False + accpar2 = ACCParallelDirective(default_present=False) + assert isinstance(accpar2, ACCParallelDirective) + assert accpar2._default_present is False + + # Call __eq__ + eq_result = accpar == accpar2 + assert eq_result is False # But only with boolean values with pytest.raises(TypeError) as err: @@ -620,22 +845,22 @@ def test_accparalleldirective(): "boolean but value '3' has been given." in str(err.value)) # The default present value has getter and setter - accpar.default_present = True - assert accpar.default_present is True + accpar2.default_present = True + assert accpar2.default_present is True with pytest.raises(TypeError) as err: - accpar.default_present = "invalid" + accpar2.default_present = "invalid" assert ("The ACCParallelDirective default_present property must be a " "boolean but value 'invalid' has been given." in str(err.value)) # The begin string depends on the default present value - accpar.default_present = True - assert accpar.begin_string() == "acc parallel default(present)" - accpar.default_present = False - assert accpar.begin_string() == "acc parallel" + accpar2.default_present = True + assert accpar2.begin_string() == "acc parallel default(present)" + accpar2.default_present = False + assert accpar2.begin_string() == "acc parallel" # It has an end_string - assert accpar.end_string() == "acc end parallel" + assert accpar2.end_string() == "acc end parallel" def test_acc_atomics_is_valid_atomic_statement(fortran_reader): diff --git a/src/psyclone/tests/psyir/nodes/directive_test.py b/src/psyclone/tests/psyir/nodes/directive_test.py index 30b077f8ed..0e4835afd9 100644 --- a/src/psyclone/tests/psyir/nodes/directive_test.py +++ b/src/psyclone/tests/psyir/nodes/directive_test.py @@ -225,4 +225,4 @@ def test_standalonedirective_children_validation(): with pytest.raises(GenerationError) as excinfo: cdir.addchild(schedule) assert ("Item 'Schedule' can't be child 0 of 'StandaloneDirective'. The " - "valid format is: 'None'." in str(excinfo.value)) + "valid format is: 'Clause*'." in str(excinfo.value)) diff --git a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py index 8216017652..662b2c815d 100644 --- a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py @@ -32,6 +32,8 @@ # POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- # Authors: R. W. Ford, A. R. Porter and S. Siso, STFC Daresbury Lab +# Modified S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann '''Module containing py.test tests for the transformation of the PSyIR of generic code using the OpenACC 'kernels' directive. @@ -44,7 +46,7 @@ from psyclone.psyir.nodes import Assignment, ACCKernelsDirective, Loop, Routine from psyclone.psyir.transformations import ( ACCKernelsTrans, TransformationError, ProfileTrans) -from psyclone.transformations import ACCLoopTrans +from psyclone.transformations import ACCEnterDataTrans, ACCLoopTrans from psyclone.tests.utilities import get_invoke EXPLICIT_LOOP = ("program do_loop\n" @@ -68,6 +70,30 @@ def test_kernels_single_node(fortran_reader): assert isinstance(schedule[0], ACCKernelsDirective) +def test_trigger_async_error(fortran_reader): + """Check that we can't apply an ACC Kernel Trans with + a parent using an async queue IDs that is different.""" + psyir = fortran_reader.psyir_from_source(EXPLICIT_LOOP) + acc_trans = ACCKernelsTrans() + + loop = psyir.walk(Loop)[0] + acc_trans.apply(loop, + {"default_present": True, + "async_queue": 2}) + + loop = psyir.walk(Loop)[0] + + with pytest.raises(TransformationError) as einfo: + acc_trans.apply(loop, {"default_present": True, + "async_queue": 3}) + + correct = ("Cannot apply ACCKernelsTrans with asynchronous" + " queue '3' because a parent directive specifies" + " queue '2'") + + assert correct in str(einfo.value) + + def test_no_kernels_error(fortran_reader): ''' Check that the transformation rejects an attempt to put things that aren't kernels inside a kernels region. ''' @@ -464,3 +490,28 @@ def test_no_assumed_size_char_in_kernels(fortran_reader): assert ("Assumed-size character variables cannot be enclosed in an OpenACC" " region but found 'explicit_size_char = assumed2" in str(err.value)) + + +def test_check_async_queue_with_enter_data(fortran_reader): + '''Tests for the check_async_queue() method.''' + acc_trans = ACCKernelsTrans() + acc_edata_trans = ACCEnterDataTrans() + with pytest.raises(TypeError) as err: + acc_trans.check_async_queue(None, 3.5) + assert ("Invalid async_queue value, expect Reference or integer or None " + "or bool, got : 3.5" in str(err.value)) + psyir = fortran_reader.psyir_from_source( + "program two_loops\n" + " integer :: ji\n" + " real :: array(10,10)\n" + " do ji = 1, 5\n" + " array(ji,1) = 2.0*array(ji,2)\n" + " end do\n" + "end program two_loops\n") + prog = psyir.walk(Routine)[0] + acc_edata_trans.apply(prog, {"async_queue": 1}) + with pytest.raises(TransformationError) as err: + acc_trans.check_async_queue(prog.walk(Loop), 2) + assert ("Cannot apply ACCKernelsTrans with asynchronous queue '2' because " + "the containing routine has an ENTER DATA directive specifying " + "queue '1'" in str(err.value)) diff --git a/src/psyclone/tests/psyir/transformations/transformations_test.py b/src/psyclone/tests/psyir/transformations/transformations_test.py index 3fa4c33f89..7b36032092 100644 --- a/src/psyclone/tests/psyir/transformations/transformations_test.py +++ b/src/psyclone/tests/psyir/transformations/transformations_test.py @@ -35,6 +35,7 @@ # A. B. G. Chalk, STFC Daresbury Lab # Modified I. Kavcic, Met Office # Modified J. Henrichs, Bureau of Meteorology +# Modified S. Valat, INRIA / LJK ''' API-agnostic tests for various transformation classes. diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index bf60af1909..b5b581ff4f 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -35,6 +35,8 @@ # A. B. G. Chalk STFC Daresbury Lab # J. Henrichs, Bureau of Meteorology # Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann ''' This module provides the various transformations that can be applied to PSyIR nodes. There are both general and API-specific transformation @@ -44,6 +46,7 @@ # pylint: disable=too-many-lines import abc +from typing import Any, Dict, Optional from psyclone import psyGen from psyclone.configuration import Config @@ -63,6 +66,7 @@ OMPParallelDirective, OMPParallelDoDirective, OMPSerialDirective, OMPSingleDirective, OMPTaskloopDirective, PSyDataNode, Reference, Return, Routine, Schedule) +from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes.array_mixin import ArrayMixin from psyclone.psyir.nodes.structure_member import StructureMember from psyclone.psyir.nodes.structure_reference import StructureReference @@ -2486,7 +2490,7 @@ def name(self): ''' return "ACCEnterDataTrans" - def apply(self, sched, options=None): + def apply(self, sched: Schedule, options: Optional[Dict[str, Any]] = {}): # pylint: disable=arguments-renamed '''Adds an OpenACC "enter data" directive to the invoke associated with the supplied Schedule. Any fields accessed by OpenACC kernels @@ -2494,9 +2498,11 @@ def apply(self, sched, options=None): order to ensure they remain on the target device. :param sched: schedule to which to add an "enter data" directive. - :type sched: sub-class of :py:class:`psyclone.psyir.nodes.Schedule` :param options: a dictionary with options for transformations. - :type options: Optional[Dict[str, Any]] + + The available options are : + - async_queue : Permit to force using the given async stream if + not False. ''' # Ensure that the proposed transformation is valid @@ -2524,12 +2530,39 @@ def apply(self, sched, options=None): current = current.parent posn = sched.children.index(current) + # extract async. Default to False. + async_queue = options.get('async_queue', False) + + # check + self.check_child_async(sched, async_queue) + # Add the directive at the position determined above, i.e. just before # the first statement containing an OpenACC compute construct. - data_dir = AccEnterDataDir(parent=sched, children=[]) + data_dir = AccEnterDataDir(parent=sched, children=[], + async_queue=async_queue) sched.addchild(data_dir, index=posn) - def validate(self, sched, options=None): + def check_child_async(self, sched, async_queue): + ''' + Common function to check that all kernel/parallel childs have the + same async queue. + + :param sched: schedule to which to add an "enter data" directive. + :type sched: sub-class of :py:class:`psyclone.psyir.nodes.Schedule` + + :param async_queue: The async queue to expect in childs. + :type async_queue: \ + Optional[bool,int,:py:class:`psyclone.core.Reference`] + ''' + qval = ACCAsyncMixin.convert_queue(async_queue) + directive_cls = (ACCParallelDirective, ACCKernelsDirective) + for dirv in sched.walk(directive_cls): + if qval != dirv.async_queue: + raise TransformationError( + 'Try to make an ACCEnterDataTrans with async_queue ' + 'different than the one in child kernels !') + + def validate(self, sched, options={}): # pylint: disable=arguments-differ, arguments-renamed ''' Check that we can safely apply the OpenACC enter-data transformation @@ -2557,6 +2590,11 @@ def validate(self, sched, options=None): raise TransformationError("Schedule already has an OpenACC data " "region - cannot add an enter data.") + async_queue = options.get('async_queue', False) + + # check consistency with childs about async_queue + self.check_child_async(sched, async_queue) + class ACCRoutineTrans(Transformation, MarkRoutineForGPUMixin): '''