From 27d51f934fe34f7ac73d10cdef1df2028f739f1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Valat?= Date: Mon, 11 Sep 2023 16:24:08 +0200 Subject: [PATCH 01/18] #2070 ACC async_queue via mixins --- src/psyclone/domain/lfric/__init__.py | 2 +- .../domain/lfric/lfric_loop_bounds.py | 7 +- src/psyclone/f2pygen.py | 5 +- src/psyclone/psyir/nodes/__init__.py | 3 +- src/psyclone/psyir/nodes/acc_directives.py | 212 ++++++++++++++-- src/psyclone/psyir/nodes/acc_mixins.py | 131 ++++++++++ .../tests/psyir/nodes/acc_directives_test.py | 234 +++++++++++++++++- .../transformations/transformations_test.py | 1 + src/psyclone/transformations.py | 102 +++++++- 9 files changed, 670 insertions(+), 27 deletions(-) create mode 100644 src/psyclone/psyir/nodes/acc_mixins.py diff --git a/src/psyclone/domain/lfric/__init__.py b/src/psyclone/domain/lfric/__init__.py index fc2eeb3547..931c237e97 100644 --- a/src/psyclone/domain/lfric/__init__.py +++ b/src/psyclone/domain/lfric/__init__.py @@ -79,4 +79,4 @@ 'LFRicExtractDriverCreator', 'LFRicInvoke', 'LFRicLoopBounds', - 'LFRicSymbolTable'] \ No newline at end of file + 'LFRicSymbolTable'] diff --git a/src/psyclone/domain/lfric/lfric_loop_bounds.py b/src/psyclone/domain/lfric/lfric_loop_bounds.py index f7708820e7..b446ee3583 100644 --- a/src/psyclone/domain/lfric/lfric_loop_bounds.py +++ b/src/psyclone/domain/lfric/lfric_loop_bounds.py @@ -36,7 +36,7 @@ # Modified J. Henrichs, Bureau of Meteorology # Modified A. B. G. Chalk and N. Nobre, STFC Daresbury Lab -''' This module provides the LFRicLoopBounds Class that handles all variables +''' This module provides the LFRicLoopBounds Class that handles all variables required for specifying loop limits within an LFRic PSy-layer routine.''' # Imports @@ -46,9 +46,8 @@ class LFRicLoopBounds(LFRicCollection): - - ''' - Handles all variables required for specifying loop limits within + ''' + Handles all variables required for specifying loop limits within an LFRic PSy-layer routine. ''' diff --git a/src/psyclone/f2pygen.py b/src/psyclone/f2pygen.py index f3be69d4c5..586b97d4ab 100644 --- a/src/psyclone/f2pygen.py +++ b/src/psyclone/f2pygen.py @@ -160,7 +160,10 @@ class ACCDirective(Directive): 'loop'). ''' def __init__(self, root, line, position, dir_type): - self._types = ["parallel", "kernels", "enter data", "loop", "routine"] + self._types = [ + "parallel", "kernels", "enter data", "loop", "routine", + "wait" + ] self._positions = ["begin", "end"] super(ACCDirective, self).__init__(root, line, position, dir_type) diff --git a/src/psyclone/psyir/nodes/__init__.py b/src/psyclone/psyir/nodes/__init__.py index 066b700748..4b04bf5d59 100644 --- a/src/psyclone/psyir/nodes/__init__.py +++ b/src/psyclone/psyir/nodes/__init__.py @@ -81,7 +81,8 @@ from psyclone.psyir.nodes.acc_directives import ACCDirective, \ ACCLoopDirective, ACCEnterDataDirective, ACCParallelDirective, \ ACCKernelsDirective, ACCDataDirective, ACCUpdateDirective, \ - ACCStandaloneDirective, ACCRegionDirective, ACCRoutineDirective + ACCStandaloneDirective, ACCRegionDirective, ACCRoutineDirective, \ + ACCWaitDirective from psyclone.psyir.nodes.omp_directives import OMPDirective, OMPDoDirective, \ OMPParallelDirective, OMPParallelDoDirective, OMPSingleDirective, \ OMPMasterDirective, OMPSerialDirective, OMPTaskloopDirective, \ diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index ff83adcbb7..8906826b20 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -36,7 +36,8 @@ # C.M. Maynard, Met Office / University of Reading # J. Henrichs, Bureau of Meteorology # Modified A. B. G. Chalk, STFC Daresbury Lab -# Modified J. G. Wallwork, Met Office +# S. Valat, INRIA / LJK +# J. G. Wallwork, Met Office # ----------------------------------------------------------------------------- ''' This module contains the implementation of the various OpenACC Directive @@ -44,6 +45,7 @@ import abc +from psyclone.core import AccessType, VariablesAccessInfo from psyclone.core import Signature from psyclone.f2pygen import DirectiveGen, CommentGen from psyclone.errors import GenerationError, InternalError @@ -54,6 +56,10 @@ RegionDirective) from psyclone.psyir.nodes.psy_data_node import PSyDataNode from psyclone.psyir.nodes.routine import Routine +from psyclone.psyir.nodes.reference import Reference +from psyclone.psyir.symbols import ScalarType +from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin +from psyclone.core.signature import Signature from psyclone.psyir.nodes.schedule import Schedule @@ -167,7 +173,7 @@ def begin_string(self): return "acc routine" -class ACCEnterDataDirective(ACCStandaloneDirective): +class ACCEnterDataDirective(ACCStandaloneDirective, ACCAsyncMixin): ''' Class representing a "!$ACC enter data" OpenACC directive in an InvokeSchedule. Must be sub-classed for a particular API because the way @@ -178,9 +184,16 @@ class ACCEnterDataDirective(ACCStandaloneDirective): :param parent: the node in the InvokeSchedule to which to add this \ directive as a child. :type parent: :py:class:`psyclone.psyir.nodes.Node` + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Int to attach to the given stream ID or use a + variable Signature to say at runtime what stream to be + used. + :type async_queue: bool | int | :py:class: psyclone.core.Signature ''' - def __init__(self, children=None, parent=None): + def __init__(self, children=None, parent=None, async_queue=False): super().__init__(children=children, parent=parent) + ACCAsyncMixin.__init__(self, async_queue) self._acc_dirs = None # List of parallel directives self._sig_set = set() @@ -202,11 +215,14 @@ def gen_code(self, parent): # incompatible with class DirectiveGen() we are using below. self.begin_string() + # async + async_option = self._build_async_string() + # Add the enter data directive. sym_list = _sig_set_to_string(self._sig_set) copy_in_str = f"copyin({sym_list})" parent.add(DirectiveGen(parent, "acc", "begin", "enter data", - copy_in_str)) + copy_in_str + async_option)) # Call an API-specific subclass of this class in case # additional declarations are required. self.data_on_device(parent) @@ -257,9 +273,15 @@ def begin_string(self): sym_list = _sig_set_to_string(self._sig_set) + # options + options = f" copyin({sym_list})" + + # async + options += self._build_async_string() + # Variables need lexicographic sorting since sets guarantee no ordering # and members of composite variables must appear later in deep copies. - return f"acc enter data copyin({sym_list})" + return f"acc enter data{options}" def data_on_device(self, parent): ''' @@ -273,14 +295,43 @@ def data_on_device(self, parent): ''' -class ACCParallelDirective(ACCRegionDirective): +class ACCParallelDirective(ACCRegionDirective, ACCAsyncMixin): ''' Class representing the !$ACC PARALLEL directive of OpenACC in the PSyIR. By default it includes the 'DEFAULT(PRESENT)' clause which means this node must either come after an EnterDataDirective or within a DataDirective. + :param children: the PSyIR nodes to be enclosed in the ACC parallel region\ + and which are therefore children of this node. + :type children: List[:py:class:`psyclone.psyir.nodes.Node`] + :param parent: the parent of this node in the PSyIR. + :type parent: sub-class of :py:class:`psyclone.psyir.nodes.Node` + :param async_queue: Make the directive asynchonous and attached to the + given stream identified by an ID or by a variable + name pointing to an integer. + :type async_queue: bool|:py:class:`psyclone.psyir.nodes.Reference`|int ''' + def __init__(self, children=None, parent=None, async_queue=False): + super().__init__(children=children, parent=parent) + ACCAsyncMixin.__init__(self, async_queue) + + def __eq__(self, other): + ''' + Checks whether two nodes are equal. Two ACCParallelDirective nodes are + equal if their default_present members are equal and they use the + same async_queue. + + :param object other: the object to check equality to. + + :returns: whether other is equal to self. + :rtype: bool + ''' + is_eq = super().__eq__(other) + is_eq = is_eq and ACCAsyncMixin.__eq__(self, other) + + return is_eq + def gen_code(self, parent): ''' Generate the elements of the f2pygen AST for this Node in the Schedule. @@ -291,8 +342,13 @@ def gen_code(self, parent): ''' self.validate_global_constraints() + # remove the "acc parallel" added by begin_string() and keep only the + # parameters + begin_args = ' '.join(self.begin_string().split()[2:]) + + # add the directive parent.add(DirectiveGen(parent, "acc", "begin", "parallel", - "default(present)")) + begin_args)) for child in self.children: child.gen_code(parent) @@ -315,7 +371,14 @@ def begin_string(self): # all data required by the parallel region is already present # on the device. If we've made a mistake and it isn't present # then we'll get a run-time error. - return "acc parallel default(present)" + + # present + options = " default(present)" + + # async + options += self._build_async_string() + + return f"acc parallel{options}" def end_string(self): ''' @@ -560,7 +623,7 @@ def end_string(self): return "" -class ACCKernelsDirective(ACCRegionDirective): +class ACCKernelsDirective(ACCRegionDirective, ACCAsyncMixin): ''' Class representing the !$ACC KERNELS directive in the PSyIR. @@ -569,12 +632,20 @@ class ACCKernelsDirective(ACCRegionDirective): :type children: List[:py:class:`psyclone.psyir.nodes.Node`] :param parent: the parent of this node in the PSyIR. :type parent: sub-class of :py:class:`psyclone.psyir.nodes.Node` - :param bool default_present: whether or not to add the "default(present)" \ - clause to the kernels directive. + :param bool default_present: whether or not to add the \ + "default(present)" clause to the kernels \ + directive. + :param async_queue: Make the directive asynchonous and attached to the + given stream identified by an ID or by a variable + name pointing to an integer. + :type async_queue: bool|:py:class:`psyclone.psyir.nodes.Reference`|int ''' - def __init__(self, children=None, parent=None, default_present=True): + + def __init__(self, children=None, parent=None, default_present=True, + async_queue=False): super().__init__(children=children, parent=parent) + ACCAsyncMixin.__init__(self, async_queue) self._default_present = default_present def __eq__(self, other): @@ -589,6 +660,7 @@ def __eq__(self, other): ''' is_eq = super().__eq__(other) is_eq = is_eq and self.default_present == other.default_present + is_eq = is_eq and ACCAsyncMixin.__eq__(self, other) return is_eq @@ -634,8 +706,14 @@ def begin_string(self): ''' result = "acc kernels" + + # present if self._default_present: result += " default(present)" + + # async + result += self._build_async_string() + return result def end_string(self): @@ -740,7 +818,7 @@ def _update_data_movement_clauses(self): self.addchild(ACCCopyClause(children=list(readwrites.values()))) -class ACCUpdateDirective(ACCStandaloneDirective): +class ACCUpdateDirective(ACCStandaloneDirective, ACCAsyncMixin): ''' Class representing the OpenACC update directive in the PSyIR. It has a direction attribute that can be set to 'self', 'host' or 'device', the set of symbols being updated and an optional if_present clause. @@ -758,15 +836,21 @@ class ACCUpdateDirective(ACCStandaloneDirective): clause on the update directive (this instructs the directive to silently ignore any variables that are not on the device). + :param async_queue: Make the directive asynchonous and attached to the \ + given stream identified by an ID or by a variable name + pointing to an integer. + :type async_queue: Optional[ + bool|:py:class:`psyclone.psyir.nodes.Reference`|int + ] :type if_present: Optional[bool] ''' _VALID_DIRECTIONS = ("self", "host", "device") def __init__(self, signatures, direction, children=None, parent=None, - if_present=True): + if_present=True, async_queue=False): super().__init__(children=children, parent=parent) - + ACCAsyncMixin.__init__(self, async_queue) self.sig_set = signatures self.direction = direction self.if_present = if_present @@ -785,6 +869,7 @@ def __eq__(self, other): is_eq = is_eq and self.sig_set == other.sig_set is_eq = is_eq and self.direction == other.direction is_eq = is_eq and self.if_present == other.if_present + is_eq = is_eq and ACCAsyncMixin.__eq__(self, other) return is_eq @@ -883,7 +968,11 @@ def begin_string(self): condition = "if_present " if self._if_present else "" sym_list = _sig_set_to_string(self._sig_set) - return f"acc update {condition}{self._direction}({sym_list})" + # async + asyncvalue = self._build_async_string() + + return \ + f"acc update {condition}{self._direction}({sym_list}){asyncvalue}" def _sig_set_to_string(sig_set): @@ -902,8 +991,97 @@ def _sig_set_to_string(sig_set): return ",".join(sorted(names)) +class ACCWaitDirective(ACCStandaloneDirective): + ''' + Class representing the !$ACC WAIT directive in the PSyIR. + + :param wait_queue: Which ACC async stream to wait. None to wait all. + :type wait_queue: Optional[:py:class:`psyclone.psyir.nodes.Reference`, int] + ''' + def __init__(self, wait_queue=None): + # call parent + super().__init__() + self.wait_queue = wait_queue + + def __eq__(self, other): + ''' + Test the equality of two directives. + + :returns: If the two directives are equals. + :rtype: bool + ''' + is_eq = super().__eq__(other) + is_eq = is_eq and self._wait_queue == other._wait_queue + return is_eq + + @property + def wait_queue(self): + ''' + :returns: The queue to wait on. + :rtype: Optional[int, :py:class:`psyclone.psyir.nodes.Reference`] + ''' + return self._wait_queue + + @wait_queue.setter + def wait_queue(self, wait_queue): + ''' + Setter to assign a specific wait queue to wait for. + + :param wait_queue: The wait queue to expect, or None for all. + :type wait_queue: Optional[int, \ + :py:class:`psyclone.psyir.nodes.Reference`] + + :raises TypeError: if `wait_queue` is of the wrong type + ''' + # check + if (wait_queue is not None + and not isinstance(wait_queue, (int, Reference))): + raise TypeError("Invalid value type as wait_group, shoule be" + "in (None, int, Signature) !") + + # set + self._wait_queue = wait_queue + + def gen_code(self, parent): + ''' + Generate the given directive code to add it to the call tree. + + :param parent: the parent Node in the Schedule to which to add this \ + content. + :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` + ''' + # remove the "acc wait" added by begin_string() and keep only the + # parameters + args = ' '.join(self.begin_string().split()[2:]) + + # Generate the directive + parent.add(DirectiveGen(parent, "acc", "begin", "wait", args)) + + def begin_string(self): + '''Returns the beginning statement of this directive, i.e. + "acc wait ...". The backend is responsible for adding the + correct directive beginning (e.g. "!$"). + + :returns: the beginning statement for this directive. + :rtype: str + + ''' + # default basic directive + result = "acc wait" + + # handle specifying groups + if self._wait_queue is not None: + if isinstance(self._wait_queue, Reference): + result += f" ({self._wait_queue.name})" + else: + result += f" ({self._wait_queue})" + + # ok return it + return result + + # For automatic API documentation generation __all__ = ["ACCRegionDirective", "ACCEnterDataDirective", "ACCParallelDirective", "ACCLoopDirective", "ACCKernelsDirective", "ACCDataDirective", "ACCUpdateDirective", "ACCStandaloneDirective", - "ACCDirective", "ACCRoutineDirective"] + "ACCDirective", "ACCRoutineDirective", "ACCWaitDirective"] diff --git a/src/psyclone/psyir/nodes/acc_mixins.py b/src/psyclone/psyir/nodes/acc_mixins.py new file mode 100644 index 0000000000..dbc96eec33 --- /dev/null +++ b/src/psyclone/psyir/nodes/acc_mixins.py @@ -0,0 +1,131 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2021-2023, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors S. Valat, INRIA / LJK +# ----------------------------------------------------------------------------- + +''' This module contains the mixins to apply some ACC features on many +classes.''' + +import abc + +from psyclone.psyir.nodes.reference import Reference + + +class ACCAsyncMixin(metaclass=abc.ABCMeta): + ''' + Class handling the common code to handle the async keyword on related acc + directives. + + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Use int to attach to the given stream ID or + use a variable Reference to say at runtime what stream + to be used. + :type async_queue: bool | int | :py:class:`psyclone.core.Reference` + ''' + def __init__(self, async_queue=False): + self.async_queue = async_queue + + @property + def async_queue(self): + ''' + :returns: whether or not async is enabled and if so, which queue this + node is associated with. (True indicates the default stream.) + Can use False to disable, True to enable on default stream. + Int to attach to the given stream ID or use a variable + Reference to say at runtime what stream to be used. + :rtype async_queue: bool | int | :py:class:`psyclone.core.Reference` + ''' + return self._async_queue + + @async_queue.setter + def async_queue(self, async_queue): + ''' + :param async_queue: Enable async support and attach it to the given + queue. Can use False to disable, True to enable on + default stream. Int to attach to the given stream + ID or use a variable Reference to say at runtime + what stream to be used. + :type async_queue: bool | int | :py:class:`psyclone.core.Reference` + :raises TypeError: if `wait_queue` is of the wrong type + ''' + # check + if (async_queue is not None and + not isinstance(async_queue, (bool, Reference, int))): + raise TypeError(f"Invalid async_queue value, expect Reference or " + f"integer or None or bool, got : {async_queue}") + + # assign + self._async_queue = async_queue + + def _build_async_string(self): + ''' + Build the async arg to concat to the acc directive when generating the + code. + + :returns: The `async()` option to add to the directive. + :rtype: str + ''' + # default + result = "" + + # async + if self.async_queue is not None: + if isinstance(self.async_queue, bool): + if self.async_queue: + result = " async()" + elif isinstance(self.async_queue, int): + result = f" async({self.async_queue})" + elif isinstance(self.async_queue, Reference): + from psyclone.psyir.backend.fortran import FortranWriter + text = FortranWriter()(self.async_queue) + result = f" async({text})" + + # ok + return result + + def __eq__(self, other): + ''' + Checks whether two nodes are equal. Two ACCAsyncMixin are + equal if their async_queue members are equal. + + :param object other: the object to check equality to. + + :returns: whether other is equal to self. + :rtype: bool + ''' + if type(self) is not type(other): + return False + else: + return self.async_queue == other.async_queue diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index 0e78b5517b..3e364e164e 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -48,6 +48,17 @@ from psyclone.f2pygen import ModuleGen from psyclone.parse.algorithm import parse from psyclone.psyGen import PSyFactory +from psyclone.psyir.nodes import ACCRoutineDirective, \ + ACCKernelsDirective, Schedule, ACCUpdateDirective, ACCLoopDirective, \ + ACCWaitDirective, Routine, ACCParallelDirective, ACCEnterDataDirective +from psyclone.psyir.nodes.reference import Reference +from psyclone.psyir.nodes.array_reference import ArrayReference, \ + Literal, INTEGER_TYPE +from psyclone.psyir.nodes.acc_directives import ACCAsyncMixin +from psyclone.psyir.symbols import SymbolTable, Symbol, DataSymbol, \ + DeferredType +from psyclone.transformations import ACCEnterDataTrans, ACCParallelTrans, \ + ACCKernelsTrans, TransformationError from psyclone.psyir.nodes import (ACCKernelsDirective, ACCLoopDirective, ACCRegionDirective, @@ -223,8 +234,40 @@ def test_accenterdatadirective_gencode_4(trans1, trans2): "nlayers,undf_w1,undf_w2,undf_w3)\n" in code) +# (3/4) Method gen_code +def test_accenterdatadirective_gencode_3_async(): + '''Test that we can add the async directive on enter data.''' + acc_trans = ACCKernelsTrans() + acc_enter_trans = ACCEnterDataTrans() + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90")) + psy = PSyFactory(distributed_memory=False).create(info) + sched = psy.invokes.get('invoke_0_testkern_type').schedule + acc_trans.apply(sched.children, options={"async_queue": 3}) + acc_enter_trans.apply(sched, options={"async_queue": 3}) + code = str(psy.gen) + assert ( + " !$acc enter data copyin(f1_proxy,f1_proxy%data," + "f2_proxy,f2_proxy%data,m1_proxy,m1_proxy%data,m2_proxy," + "m2_proxy%data,map_w1,map_w2,map_w3,ndf_w1,ndf_w2,ndf_w3,nlayers," + "undf_w1,undf_w2,undf_w3) async(3)\n" in code) + + +# (3/4) Method gen_code +def test_accenterdatadirective_gencode_3_async_error(): + '''Test that we can add the async directive on enter data.''' + acc_trans = ACCKernelsTrans() + acc_enter_trans = ACCEnterDataTrans() + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90")) + psy = PSyFactory(distributed_memory=False).create(info) + sched = psy.invokes.get('invoke_0_testkern_type').schedule + acc_trans.apply(sched.children) + with pytest.raises(TransformationError) as error: + acc_enter_trans.apply(sched, options={"async_queue": 3}) + assert 'async_queue different' in str(error.value) + # Class ACCLoopDirective start + def test_accloopdirective_node_str(monkeypatch): ''' Test the node_str() method of ACCLoopDirective node ''' directive = ACCLoopDirective() @@ -352,6 +395,50 @@ def test_acckernelsdirective_gencode(default_present): " !$acc end kernels\n" in code) +# (1/1) Method gen_code +@pytest.mark.parametrize("async_queue", [ + False, True, 1, 0, + Reference(Symbol('stream1')), + ArrayReference.create(DataSymbol( + 'stream2', + DeferredType()), + [Literal("1", INTEGER_TYPE)] + ) + ]) +def test_acckernelsdirective_gencode_async_queue(async_queue): + '''Check that the gen_code method in the ACCKernelsDirective class + generates the expected code. Use the dynamo0.3 API. + + ''' + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90")) + psy = PSyFactory(distributed_memory=False).create(info) + sched = psy.invokes.get('invoke_0_testkern_type').schedule + + trans = ACCKernelsTrans() + trans.apply(sched, {"async_queue": async_queue}) + + code = str(psy.gen) + string = "" + if async_queue is None: + string = "" + elif isinstance(async_queue, bool) and async_queue is True: + string = " async()" + elif isinstance(async_queue, bool) and async_queue is False: + string = "" + elif isinstance(async_queue, int): + string = f" async({async_queue})" + elif isinstance(async_queue, ArrayReference): + string = " async(stream2(1))" + elif isinstance(async_queue, Reference): + string = " async(stream1)" + assert ( + f" !$acc kernels{string}\n" + f" DO cell=loop0_start,loop0_stop\n" in code) + assert ( + " END DO\n" + " !$acc end kernels\n" in code) + + def test_acckerneldirective_equality(): ''' Test the __eq__ method of ACCKernelsDirective node. ''' # We need to manually set the same SymbolTable instance in both directives @@ -415,6 +502,17 @@ def test_accupdatedirective_init(): directive = ACCUpdateDirective(sig, "host", if_present=False) assert directive.if_present is False + assert directive.async_queue is False + + directive = ACCUpdateDirective(sig, "host", async_queue=True) + assert directive.async_queue is True + + directive = ACCUpdateDirective(sig, "host", async_queue=1) + assert directive.async_queue == 1 + + directive = ACCUpdateDirective(sig, "host", + async_queue=Reference(Symbol("var"))) + assert directive.async_queue == Reference(Symbol("var")) def test_accupdatedirective_begin_string(): @@ -424,9 +522,22 @@ def test_accupdatedirective_begin_string(): directive_host = ACCUpdateDirective(sig, "host", if_present=False) directive_device = ACCUpdateDirective(sig, "device") directive_empty = ACCUpdateDirective(set(), "host", if_present=False) + directive_async_default = ACCUpdateDirective(sig, "device", + async_queue=True) + directive_async_queue_int = ACCUpdateDirective(sig, "device", + async_queue=1) + directive_async_queue_str = \ + ACCUpdateDirective(sig, "device", async_queue=Reference(Symbol("var"))) assert directive_host.begin_string() == "acc update host(x)" - assert directive_device.begin_string() == "acc update if_present device(x)" + assert directive_device.begin_string() \ + == "acc update if_present device(x)" + assert directive_async_default.begin_string() \ + == "acc update if_present device(x) async()" + assert directive_async_queue_int.begin_string() \ + == "acc update if_present device(x) async(1)" + assert directive_async_queue_str.begin_string() \ + == "acc update if_present device(x) async(var)" with pytest.raises(GenerationError) as err: directive_empty.begin_string() @@ -454,6 +565,127 @@ def test_accupdatedirective_equality(): assert directive1 != directive5 +# Class ACCWaitDirective + +def test_accwaitdirective_init(): + '''Test init of ACCWaitDirective.''' + + directive1 = ACCWaitDirective(None) + assert directive1.wait_queue is None + + directive2 = ACCWaitDirective(0) + assert directive2.wait_queue == 0 + + directive3 = ACCWaitDirective(1) + assert directive3.wait_queue == 1 + + directive4 = ACCWaitDirective(Reference(Symbol("variable_name"))) + assert directive4.wait_queue == Reference(Symbol("variable_name")) + + with pytest.raises(TypeError) as error: + directive5 = ACCWaitDirective(3.5) + assert 'Invalid value type as wait_group' in str(error) + + +def test_accwaitdirective_begin_string(): + '''Test begin_string of ACCWaitDirective.''' + + directive1 = ACCWaitDirective(None) + assert directive1.begin_string() == "acc wait" + + directive2 = ACCWaitDirective(1) + assert directive2.begin_string() == "acc wait (1)" + + directive3 = ACCWaitDirective(Reference(Symbol("variable_name"))) + assert directive3.begin_string() == "acc wait (variable_name)" + + +def test_accwaitdirective_gencode(): + '''Test gen code of ACCWaitDirective''' + + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90")) + psy = PSyFactory(distributed_memory=False).create(info) + routines = psy.container.walk(Routine) + routines[0].children.append(ACCWaitDirective(1)) + code = str(psy.gen) + assert '$acc wait (1)' in code + + +def test_accwaitdirective_eq(): + '''Test the __eq__ implementation of ACCWaitDirective.''' + + # build some + directive1 = ACCWaitDirective(1) + directive2 = ACCWaitDirective(1) + directive3 = ACCWaitDirective(Reference(Symbol('stream1'))) + + # check equality + assert directive1 == directive2 + assert not (directive1 == directive3) + +# async keyword on all classes + + +@pytest.mark.parametrize("directive_type", + [ACCKernelsDirective, ACCParallelDirective, + ACCUpdateDirective, ACCEnterDataDirective]) +def test_directives_async_queue(directive_type): + '''Validate the various usage of async_queue parameter''' + + # args + args = [] + if directive_type == ACCUpdateDirective: + args = [[Signature('x')], 'host'] + + # set value at init + directive = directive_type(*args, async_queue=1) + + # need to have some data in + if directive_type == ACCEnterDataDirective: + directive._sig_set.add(Signature("x")) + + # check initial status + assert directive.async_queue == 1 + assert 'async(1)' in directive.begin_string() + + # change value to true + directive.async_queue = True + assert directive.async_queue is True + assert 'async()' in directive.begin_string() + + # change value to False + directive.async_queue = False + assert directive.async_queue is False + assert 'async()' not in directive.begin_string() + + # change value to None + directive.async_queue = None + assert directive.async_queue is None + assert 'async()' not in directive.begin_string() + + # change value afterward + directive.async_queue = Reference(Symbol("stream")) + assert directive.async_queue == Reference(Symbol("stream")) + assert 'async(stream)' in directive.begin_string() + + # put wrong type + with pytest.raises(TypeError) as error: + directive.async_queue = 3.5 + assert "Invalid async_queue" in str(error) + + +def test_mixin_constructor_error(): + ''' + Check constructor with an unexpected value type (float instead of int) + ''' + + with pytest.raises(TypeError) as error: + asyncMixin = ACCAsyncMixin(3.5) + value = asyncMixin._build_async_string() + assert "Invalid async_queue value, expect Reference or integer or None " \ + "or bool, got : 3.5" in str(error) + + def test_accdatadirective_update_data_movement_clauses(fortran_reader, fortran_writer): '''Test that the data movement clauses are constructed correctly for the diff --git a/src/psyclone/tests/psyir/transformations/transformations_test.py b/src/psyclone/tests/psyir/transformations/transformations_test.py index b2a43b3da5..c9d6f7b7a3 100644 --- a/src/psyclone/tests/psyir/transformations/transformations_test.py +++ b/src/psyclone/tests/psyir/transformations/transformations_test.py @@ -35,6 +35,7 @@ # A. B. G. Chalk, STFC Daresbury Lab # Modified I. Kavcic, Met Office # Modified J. Henrichs, Bureau of Meteorology +# Modified S. Valat, INRIA / LJK ''' API-agnostic tests for various transformation classes. diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index b68bf56d15..37402e004c 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -2266,6 +2266,11 @@ def apply(self, sched, options=None): :param sched: schedule to which to add an "enter data" directive. :type sched: sub-class of :py:class:`psyclone.psyir.nodes.Schedule` :param options: a dictionary with options for transformations. + + The available options are : + - async_queue : Permit to force using the given async stream if + not None. + :type options: Optional[Dict[str, Any]] ''' @@ -2299,11 +2304,43 @@ def apply(self, sched, options=None): current = current.parent posn = sched.children.index(current) + # handle default empty options + if options is None: + options = {} + + # extract async + async_queue = options.get('async_queue', False) + + # check + self.check_child_async(sched, async_queue) + # Add the directive at the position determined above, i.e. just before # the first statement containing an OpenACC compute construct. - data_dir = AccEnterDataDir(parent=sched, children=[]) + data_dir = AccEnterDataDir(parent=sched, children=[], + async_queue=async_queue) sched.addchild(data_dir, index=posn) + def check_child_async(self, sched, async_queue): + ''' + Common function to check that all kernel/parallel childs have the + same async queue. + + :param sched: schedule to which to add an "enter data" directive. + :type sched: sub-class of :py:class:`psyclone.psyir.nodes.Schedule` + + :param async_queue: The async queue to expect in childs. + :type async_queue: \ + Optional[bool,int,:py:class: psyclone.core.Reference] + ''' + + directive_cls = (ACCParallelDirective, ACCKernelsDirective) + for dir in sched.walk(directive_cls): + if async_queue is not None: + if async_queue != dir.async_queue: + raise TransformationError( + 'Try to make an ACCEnterDataTrans with async_queue ' + 'different than the one in child kernels !') + def validate(self, sched, options=None): # pylint: disable=arguments-differ, arguments-renamed ''' @@ -2332,6 +2369,14 @@ def validate(self, sched, options=None): raise TransformationError("Schedule already has an OpenACC data " "region - cannot add an enter data.") + # handle async option + if options is None: + options = {} + async_queue = options.get('async_queue', False) + + # check consistency with childs about async_queue + self.check_child_async(sched, async_queue) + class ACCRoutineTrans(Transformation): ''' @@ -2514,14 +2559,52 @@ def apply(self, node, options=None): if not options: options = {} default_present = options.get("default_present", False) + async_queue = options.get("async_queue", False) + + # check + self.check_async_queue(node, async_queue) # Create a directive containing the nodes in node_list and insert it. directive = ACCKernelsDirective( parent=parent, children=[node.detach() for node in node_list], - default_present=default_present) + default_present=default_present, async_queue=async_queue) parent.children.insert(start_index, directive) + def check_async_queue(self, nodes, async_queue): + ''' + Common function to check that all parent data directives have + the same async queue. + + :param node: a node or list of nodes in the PSyIR to enclose. + :type nodes: (a list of) :py:class:`psyclone.psyir.nodes.Node` + + :param async_queue: The async queue to expect in parents. + :type async_queue: \ + Optional[bool,int,:py:class: psyclone.core.Reference] + ''' + + # check type + if (async_queue is not None and + not isinstance(async_queue, (int, Reference))): + raise TypeError(f"Invalid async_queue value, expect Reference or " + f"integer or None or False, got : {async_queue}") + + # handle list + if not isinstance(nodes, list): + nodes = [nodes] + + directive_cls = (ACCDataDirective) + for node in nodes: + parent = node.ancestor(directive_cls) + if parent is not None: + if async_queue is not None: + if async_queue != parent.async_queue: + raise TransformationError( + 'Try to make an ACCKernelTrans with async_queue ' + 'different than the one in parent data directive ' + '!') + def validate(self, nodes, options): # pylint: disable=signature-differs ''' @@ -2532,6 +2615,11 @@ def validate(self, nodes, options): kernels region. :type nodes: (list of) :py:class:`psyclone.psyir.nodes.Node` :param options: a dictionary with options for transformations. + + The available options are : + - async_queue : Permit to force using the given async stream if + not None. + :type options: Optional[Dict[str, Any]] :raises NotImplementedError: if the supplied Nodes belong to \ @@ -2564,6 +2652,16 @@ def validate(self, nodes, options): "A kernels transformation must enclose at least one loop or " "array range but none were found.") + # handle default empty option + if options is None: + options = {} + + # extract async + async_queue = options.get('async_queue', False) + + # check + self.check_async_queue(node, async_queue) + class ACCDataTrans(RegionTrans): ''' From 918e07b9b7a47755064b8c2db14a5ede66bac0c4 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 18 Jul 2024 14:13:04 +0100 Subject: [PATCH 02/18] #1664 mv new functionality over to new class location --- .../transformations/acc_kernels_trans.py | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index ae237aa179..d19f2605d5 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -102,14 +102,52 @@ def apply(self, node, options=None): if not options: options = {} default_present = options.get("default_present", False) + async_queue = options.get("async_queue", False) + + # check + self.check_async_queue(node, async_queue) # Create a directive containing the nodes in node_list and insert it. directive = ACCKernelsDirective( parent=parent, children=[node.detach() for node in node_list], - default_present=default_present) + default_present=default_present, async_queue=async_queue) parent.children.insert(start_index, directive) + def check_async_queue(self, nodes, async_queue): + ''' + Common function to check that all parent data directives have + the same async queue. + + :param node: a node or list of nodes in the PSyIR to enclose. + :type nodes: (a list of) :py:class:`psyclone.psyir.nodes.Node` + + :param async_queue: The async queue to expect in parents. + :type async_queue: \ + Optional[bool,int,:py:class: psyclone.core.Reference] + ''' + + # check type + if (async_queue is not None and + not isinstance(async_queue, (int, Reference))): + raise TypeError(f"Invalid async_queue value, expect Reference or " + f"integer or None or False, got : {async_queue}") + + # handle list + if not isinstance(nodes, list): + nodes = [nodes] + + directive_cls = (ACCDataDirective) + for node in nodes: + parent = node.ancestor(directive_cls) + if parent is not None: + if async_queue is not None: + if async_queue != parent.async_queue: + raise TransformationError( + 'Try to make an ACCKernelTrans with async_queue ' + 'different than the one in parent data directive ' + '!') + def validate(self, nodes, options=None): # pylint: disable=signature-differs ''' @@ -124,6 +162,7 @@ def validate(self, nodes, options=None): :param bool options["disable_loop_check"]: whether to disable the check that the supplied region contains 1 or more loops. Default is False (i.e. the check is enabled). + :param int options["async_queue"]: use the specified async stream. :raises NotImplementedError: if the supplied Nodes belong to a GOInvokeSchedule. From edf991726819efda366ec3f41b40eaeca77edcdf Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 18 Jul 2024 16:36:55 +0100 Subject: [PATCH 03/18] #1664 fix some import errors after merge --- .../transformations/acc_kernels_trans.py | 4 ++-- .../tests/psyir/nodes/acc_directives_test.py | 20 ++++++------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index d19f2605d5..f7a43b4cc5 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -42,8 +42,8 @@ from psyclone import psyGen from psyclone.psyir.nodes import ( - ACCKernelsDirective, Assignment, Call, CodeBlock, Loop, PSyDataNode, - Reference, Return, Routine, Statement, WhileLoop) + ACCDataDirective, ACCKernelsDirective, Assignment, Call, CodeBlock, Loop, + PSyDataNode, Reference, Return, Routine, Statement, WhileLoop) from psyclone.psyir.symbols import UnsupportedFortranType from psyclone.psyir.transformations.region_trans import RegionTrans from psyclone.psyir.transformations.transformation_error import ( diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index f4a61364e9..94040291a3 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -47,18 +47,10 @@ from psyclone.f2pygen import ModuleGen from psyclone.parse.algorithm import parse from psyclone.psyGen import PSyFactory -from psyclone.psyir.nodes import ACCRoutineDirective, \ - ACCKernelsDirective, Schedule, ACCUpdateDirective, ACCLoopDirective, \ - ACCWaitDirective, Routine, ACCParallelDirective, ACCEnterDataDirective -from psyclone.psyir.nodes.reference import Reference -from psyclone.psyir.nodes.array_reference import ArrayReference, \ - Literal, INTEGER_TYPE +from psyclone.psyir.nodes.array_reference import ArrayReference from psyclone.psyir.nodes.acc_directives import ACCAsyncMixin -from psyclone.psyir.symbols import SymbolTable, Symbol, DataSymbol, \ - DeferredType -from psyclone.transformations import ACCEnterDataTrans, ACCParallelTrans, \ - ACCKernelsTrans, TransformationError -from psyclone.psyir.nodes import (ACCKernelsDirective, +from psyclone.psyir.nodes import (ACCEnterDataDirective, + ACCKernelsDirective, ACCLoopDirective, ACCParallelDirective, ACCRegionDirective, @@ -71,8 +63,8 @@ Return, Routine) from psyclone.psyir.nodes.loop import Loop -from psyclone.psyir.nodes.schedule import Schedule -from psyclone.psyir.symbols import SymbolTable, DataSymbol, INTEGER_TYPE +from psyclone.psyir.symbols import ( + Symbol, SymbolTable, DataSymbol, INTEGER_TYPE, UnresolvedType) from psyclone.psyir.transformations import ACCKernelsTrans from psyclone.transformations import ( ACCDataTrans, ACCEnterDataTrans, ACCLoopTrans, @@ -444,7 +436,7 @@ def test_acckernelsdirective_gencode(default_present): Reference(Symbol('stream1')), ArrayReference.create(DataSymbol( 'stream2', - DeferredType()), + UnresolvedType()), [Literal("1", INTEGER_TYPE)] ) ]) From b9de9feecb04548b385558eaf12afd45071eeec0 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 18 Jul 2024 18:37:53 +0100 Subject: [PATCH 04/18] #1664 more fixes following merge --- .../transformations/acc_kernels_trans.py | 4 ++ .../tests/psyir/nodes/acc_directives_test.py | 48 ++++++++----------- src/psyclone/transformations.py | 10 ---- 3 files changed, 25 insertions(+), 37 deletions(-) diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index f7a43b4cc5..bcfa58b120 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -226,6 +226,10 @@ def validate(self, nodes, options=None): f"Cannot include '{icall.debug_string()}' in an " f"OpenACC region because it is not available on GPU.") + # extract async option and check validity + async_queue = options.get('async_queue', False) if options else False + self.check_async_queue(node, async_queue) + # Check that we have at least one loop or array range within # the proposed region unless this has been disabled. if options and options.get("disable_loop_check", False): diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index 94040291a3..13d3a62f6c 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -49,26 +49,18 @@ from psyclone.psyGen import PSyFactory from psyclone.psyir.nodes.array_reference import ArrayReference from psyclone.psyir.nodes.acc_directives import ACCAsyncMixin -from psyclone.psyir.nodes import (ACCEnterDataDirective, - ACCKernelsDirective, - ACCLoopDirective, - ACCParallelDirective, - ACCRegionDirective, - ACCRoutineDirective, - ACCUpdateDirective, - ACCAtomicDirective, - Assignment, - Literal, - Reference, - Return, - Routine) +from psyclone.psyir.nodes import ( + ACCEnterDataDirective, ACCKernelsDirective, ACCLoopDirective, + ACCParallelDirective, ACCRegionDirective, ACCRoutineDirective, + ACCUpdateDirective, ACCAtomicDirective, ACCWaitDirective, Assignment, + Literal, Reference, Return, Routine, Schedule) from psyclone.psyir.nodes.loop import Loop from psyclone.psyir.symbols import ( Symbol, SymbolTable, DataSymbol, INTEGER_TYPE, UnresolvedType) from psyclone.psyir.transformations import ACCKernelsTrans from psyclone.transformations import ( ACCDataTrans, ACCEnterDataTrans, ACCLoopTrans, - ACCParallelTrans, ACCRoutineTrans) + ACCParallelTrans, ACCRoutineTrans, TransformationError) BASE_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( os.path.abspath(__file__)))), "test_files", "dynamo0p3") @@ -227,28 +219,29 @@ def test_accenterdatadirective_gencode_4(trans1, trans2): # (3/4) Method gen_code def test_accenterdatadirective_gencode_3_async(): '''Test that we can add the async directive on enter data.''' + API = "lfric" acc_trans = ACCKernelsTrans() acc_enter_trans = ACCEnterDataTrans() - _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90")) - psy = PSyFactory(distributed_memory=False).create(info) + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"), api=API) + psy = PSyFactory(distributed_memory=False, api=API).create(info) sched = psy.invokes.get('invoke_0_testkern_type').schedule acc_trans.apply(sched.children, options={"async_queue": 3}) acc_enter_trans.apply(sched, options={"async_queue": 3}) code = str(psy.gen) assert ( - " !$acc enter data copyin(f1_proxy,f1_proxy%data," - "f2_proxy,f2_proxy%data,m1_proxy,m1_proxy%data,m2_proxy," - "m2_proxy%data,map_w1,map_w2,map_w3,ndf_w1,ndf_w2,ndf_w3,nlayers," + " !$acc enter data copyin(f1_data,f2_data,m1_data,m2_data,map_w1," + "map_w2,map_w3,ndf_w1,ndf_w2,ndf_w3,nlayers," "undf_w1,undf_w2,undf_w3) async(3)\n" in code) # (3/4) Method gen_code def test_accenterdatadirective_gencode_3_async_error(): '''Test that we can add the async directive on enter data.''' + API = "lfric" acc_trans = ACCKernelsTrans() acc_enter_trans = ACCEnterDataTrans() - _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90")) - psy = PSyFactory(distributed_memory=False).create(info) + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"), api=API) + psy = PSyFactory(distributed_memory=False, api=API).create(info) sched = psy.invokes.get('invoke_0_testkern_type').schedule acc_trans.apply(sched.children) with pytest.raises(TransformationError) as error: @@ -445,8 +438,9 @@ def test_acckernelsdirective_gencode_async_queue(async_queue): generates the expected code. Use the dynamo0.3 API. ''' - _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90")) - psy = PSyFactory(distributed_memory=False).create(info) + API = "lfric" + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"), api=API) + psy = PSyFactory(distributed_memory=False, api=API).create(info) sched = psy.invokes.get('invoke_0_testkern_type').schedule trans = ACCKernelsTrans() @@ -468,7 +462,7 @@ def test_acckernelsdirective_gencode_async_queue(async_queue): string = " async(stream1)" assert ( f" !$acc kernels{string}\n" - f" DO cell=loop0_start,loop0_stop\n" in code) + f" DO cell = loop0_start, loop0_stop, 1\n" in code) assert ( " END DO\n" " !$acc end kernels\n" in code) @@ -662,9 +656,9 @@ def test_accwaitdirective_begin_string(): def test_accwaitdirective_gencode(): '''Test gen code of ACCWaitDirective''' - - _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90")) - psy = PSyFactory(distributed_memory=False).create(info) + API = "lfric" + _, info = parse(os.path.join(BASE_PATH, "1_single_invoke.f90"), api=API) + psy = PSyFactory(distributed_memory=False, api=API).create(info) routines = psy.container.walk(Routine) routines[0].children.append(ACCWaitDirective(1)) code = str(psy.gen) diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index 7f023fa460..e32ac464a8 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -2649,16 +2649,6 @@ def validate(self, node, options=None): f"parallelism. Should be one of " f"{ACCRoutineDirective.SUPPORTED_PARALLELISM}") - # handle default empty option - if options is None: - options = {} - - # extract async - async_queue = options.get('async_queue', False) - - # check - self.check_async_queue(node, async_queue) - class ACCDataTrans(RegionTrans): ''' From da4f5b5b276f7a7234b65bf87b1ff1c9e6a6f7d9 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 18 Jul 2024 18:47:47 +0100 Subject: [PATCH 05/18] #1664 fix linting errors --- src/psyclone/psyir/nodes/acc_directives.py | 3 --- src/psyclone/tests/psyir/nodes/acc_directives_test.py | 11 ++++++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index 7409fbce9a..01f105e1db 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -45,7 +45,6 @@ import abc -from psyclone.core import AccessType, VariablesAccessInfo from psyclone.core import Signature from psyclone.f2pygen import DirectiveGen, CommentGen from psyclone.errors import GenerationError, InternalError @@ -59,9 +58,7 @@ from psyclone.psyir.nodes.psy_data_node import PSyDataNode from psyclone.psyir.nodes.routine import Routine from psyclone.psyir.nodes.reference import Reference -from psyclone.psyir.symbols import ScalarType from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin -from psyclone.core.signature import Signature from psyclone.psyir.nodes.schedule import Schedule from psyclone.psyir.nodes.operation import BinaryOperation from psyclone.psyir.symbols import ScalarType diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index 13d3a62f6c..3ba5ea69ce 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -637,7 +637,7 @@ def test_accwaitdirective_init(): assert directive4.wait_queue == Reference(Symbol("variable_name")) with pytest.raises(TypeError) as error: - directive5 = ACCWaitDirective(3.5) + _ = ACCWaitDirective(3.5) assert 'Invalid value type as wait_group' in str(error) @@ -731,13 +731,14 @@ def test_directives_async_queue(directive_type): def test_mixin_constructor_error(): ''' Check constructor with an unexpected value type (float instead of int) + ''' + asyncMixin = ACCAsyncMixin(3.5) with pytest.raises(TypeError) as error: - asyncMixin = ACCAsyncMixin(3.5) - value = asyncMixin._build_async_string() - assert "Invalid async_queue value, expect Reference or integer or None " \ - "or bool, got : 3.5" in str(error) + _ = asyncMixin._build_async_string() + assert ("Invalid async_queue value, expect Reference or integer or None " + "or bool, got : 3.5" in str(error)) def test_accdatadirective_update_data_movement_clauses(fortran_reader, From 2ef718b61b972307ed6af9e5661c2524d3f81a68 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Fri, 22 Nov 2024 12:39:52 +0000 Subject: [PATCH 06/18] #1664 update tests --- src/psyclone/tests/psyir/nodes/acc_directives_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index 530c459c38..165e5ef07c 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -230,7 +230,7 @@ def test_accenterdatadirective_gencode_3_async(): code = str(psy.gen) assert ( " !$acc enter data copyin(f1_data,f2_data,m1_data,m2_data,map_w1," - "map_w2,map_w3,ndf_w1,ndf_w2,ndf_w3,nlayers," + "map_w2,map_w3,ndf_w1,ndf_w2,ndf_w3,nlayers_f1," "undf_w1,undf_w2,undf_w3) async(3)\n" in code) @@ -248,6 +248,7 @@ def test_accenterdatadirective_gencode_3_async_error(): acc_enter_trans.apply(sched, options={"async_queue": 3}) assert 'async_queue different' in str(error.value) + # Class ACCLoopDirective start def test_accloopdirective_node_str_default(monkeypatch): @@ -764,10 +765,9 @@ def test_mixin_constructor_error(): Check constructor with an unexpected value type (float instead of int) ''' - asyncMixin = ACCAsyncMixin(3.5) - with pytest.raises(TypeError) as error: - _ = asyncMixin._build_async_string() + _ = ACCAsyncMixin(3.5) + assert ("Invalid async_queue value, expect Reference or integer or None " "or bool, got : 3.5" in str(error)) From b351d84c9f12beeac9838dfada34925dc2d37edb Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Fri, 22 Nov 2024 16:59:40 +0000 Subject: [PATCH 07/18] #1664 tidy and rationalise checking code --- src/psyclone/psyir/nodes/acc_mixins.py | 3 +- .../transformations/acc_kernels_trans.py | 54 +++++++++++-------- .../transformations/acc_kernels_trans_test.py | 27 +++++++++- 3 files changed, 58 insertions(+), 26 deletions(-) diff --git a/src/psyclone/psyir/nodes/acc_mixins.py b/src/psyclone/psyir/nodes/acc_mixins.py index dbc96eec33..7f6dd10c96 100644 --- a/src/psyclone/psyir/nodes/acc_mixins.py +++ b/src/psyclone/psyir/nodes/acc_mixins.py @@ -127,5 +127,4 @@ def __eq__(self, other): ''' if type(self) is not type(other): return False - else: - return self.async_queue == other.async_queue + return self.async_queue == other.async_queue diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index bcfa58b120..a7fabce10d 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -41,8 +41,10 @@ import re from psyclone import psyGen +from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes import ( - ACCDataDirective, ACCKernelsDirective, Assignment, Call, CodeBlock, Loop, + ACCEnterDataDirective, ACCKernelsDirective, Assignment, + Call, CodeBlock, Loop, PSyDataNode, Reference, Return, Routine, Statement, WhileLoop) from psyclone.psyir.symbols import UnsupportedFortranType from psyclone.psyir.transformations.region_trans import RegionTrans @@ -105,7 +107,7 @@ def apply(self, node, options=None): async_queue = options.get("async_queue", False) # check - self.check_async_queue(node, async_queue) + self.check_async_queue(node_list, async_queue) # Create a directive containing the nodes in node_list and insert it. directive = ACCKernelsDirective( @@ -114,39 +116,45 @@ def apply(self, node, options=None): parent.children.insert(start_index, directive) - def check_async_queue(self, nodes, async_queue): + @staticmethod + def check_async_queue(nodes, async_queue): ''' Common function to check that all parent data directives have the same async queue. - :param node: a node or list of nodes in the PSyIR to enclose. - :type nodes: (a list of) :py:class:`psyclone.psyir.nodes.Node` + :param node: the nodes in the PSyIR to enclose. + :type nodes: list[:py:class:`psyclone.psyir.nodes.Node`] :param async_queue: The async queue to expect in parents. :type async_queue: \ - Optional[bool,int,:py:class: psyclone.core.Reference] + Optional[bool, int, :py:class:`psyclone.core.Reference`] + ''' + if async_queue is None: + return # check type - if (async_queue is not None and - not isinstance(async_queue, (int, Reference))): + if not isinstance(async_queue, (int, Reference)): raise TypeError(f"Invalid async_queue value, expect Reference or " f"integer or None or False, got : {async_queue}") - # handle list - if not isinstance(nodes, list): - nodes = [nodes] - - directive_cls = (ACCDataDirective) - for node in nodes: - parent = node.ancestor(directive_cls) - if parent is not None: - if async_queue is not None: - if async_queue != parent.async_queue: - raise TransformationError( - 'Try to make an ACCKernelTrans with async_queue ' - 'different than the one in parent data directive ' - '!') + parent = nodes[0].ancestor(ACCAsyncMixin) + if parent and async_queue != parent.async_queue: + raise TransformationError( + f"Cannot apply ACCKernelsTrans with asynchronous " + f"queue '{async_queue}' because a parent " + f"directive specifies queue '{parent.async_queue}'") + + parent = nodes[0].ancestor(Routine) + if parent: + edata = parent.walk(ACCEnterDataDirective) + if edata: + if async_queue != edata[0].async_queue: + raise TransformationError( + f"Cannot apply ACCKernelsTrans with asynchronous queue" + f" '{async_queue}' because the containing routine " + f"has an ENTER DATA directive specifying queue " + f"'{edata[0].async_queue}'") def validate(self, nodes, options=None): # pylint: disable=signature-differs @@ -228,7 +236,7 @@ def validate(self, nodes, options=None): # extract async option and check validity async_queue = options.get('async_queue', False) if options else False - self.check_async_queue(node, async_queue) + self.check_async_queue(node_list, async_queue) # Check that we have at least one loop or array range within # the proposed region unless this has been disabled. diff --git a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py index 2ac8c88b20..292240954f 100644 --- a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py @@ -44,7 +44,7 @@ from psyclone.psyir.nodes import Assignment, ACCKernelsDirective, Loop, Routine from psyclone.psyir.transformations import ( ACCKernelsTrans, TransformationError, ProfileTrans) -from psyclone.transformations import ACCLoopTrans +from psyclone.transformations import ACCEnterDataTrans, ACCLoopTrans from psyclone.tests.utilities import get_invoke EXPLICIT_LOOP = ("program do_loop\n" @@ -464,3 +464,28 @@ def test_no_assumed_size_char_in_kernels(fortran_reader): assert ("Assumed-size character variables cannot be enclosed in an OpenACC" " region but found 'explicit_size_char = assumed2" in str(err.value)) + + +def test_check_async_queue_with_enter_data(fortran_reader): + '''Tests for the check_async_queue() method.''' + acc_trans = ACCKernelsTrans() + acc_edata_trans = ACCEnterDataTrans() + with pytest.raises(TypeError) as err: + acc_trans.check_async_queue(None, 3.5) + assert ("Invalid async_queue value, expect Reference or integer or None " + "or False, got : 3.5" in str(err.value)) + psyir = fortran_reader.psyir_from_source( + "program two_loops\n" + " integer :: ji\n" + " real :: array(10,10)\n" + " do ji = 1, 5\n" + " array(ji,1) = 2.0*array(ji,2)\n" + " end do\n" + "end program two_loops\n") + prog = psyir.walk(Routine)[0] + acc_edata_trans.apply(prog, {"async_queue": 1}) + with pytest.raises(TransformationError) as err: + acc_trans.check_async_queue(prog.walk(Loop), 2) + assert ("Cannot apply ACCKernelsTrans with asynchronous queue '2' because " + "the containing routine has an ENTER DATA directive specifying " + "queue '1'" in str(err.value)) From 1be11c3e88abeb653ddc92d737c33c4792436dff Mon Sep 17 00:00:00 2001 From: SCHREIBER Martin Date: Mon, 25 Nov 2024 23:07:20 +0100 Subject: [PATCH 08/18] updated for coverage --- src/psyclone/psyir/nodes/acc_mixins.py | 2 +- .../transformations/acc_kernels_trans.py | 21 +++++++-------- .../tests/psyir/nodes/acc_directives_test.py | 26 +++++++++++-------- .../transformations/acc_kernels_trans_test.py | 24 +++++++++++++++++ src/psyclone/transformations.py | 10 +++---- 5 files changed, 55 insertions(+), 28 deletions(-) diff --git a/src/psyclone/psyir/nodes/acc_mixins.py b/src/psyclone/psyir/nodes/acc_mixins.py index 7f6dd10c96..b19275a5c9 100644 --- a/src/psyclone/psyir/nodes/acc_mixins.py +++ b/src/psyclone/psyir/nodes/acc_mixins.py @@ -54,7 +54,7 @@ class ACCAsyncMixin(metaclass=abc.ABCMeta): to be used. :type async_queue: bool | int | :py:class:`psyclone.core.Reference` ''' - def __init__(self, async_queue=False): + def __init__(self, async_queue=None): self.async_queue = async_queue @property diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index a7fabce10d..531e3c0afc 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -76,7 +76,7 @@ class ACCKernelsTrans(RegionTrans): excluded_node_types = (CodeBlock, Return, PSyDataNode, psyGen.HaloExchange, WhileLoop) - def apply(self, node, options=None): + def apply(self, node, options={}): ''' Enclose the supplied list of PSyIR nodes within an OpenACC Kernels region. @@ -101,10 +101,8 @@ def apply(self, node, options=None): parent = node_list[0].parent start_index = node_list[0].position - if not options: - options = {} default_present = options.get("default_present", False) - async_queue = options.get("async_queue", False) + async_queue = options.get("async_queue", None) # check self.check_async_queue(node_list, async_queue) @@ -139,11 +137,12 @@ def check_async_queue(nodes, async_queue): f"integer or None or False, got : {async_queue}") parent = nodes[0].ancestor(ACCAsyncMixin) - if parent and async_queue != parent.async_queue: - raise TransformationError( - f"Cannot apply ACCKernelsTrans with asynchronous " - f"queue '{async_queue}' because a parent " - f"directive specifies queue '{parent.async_queue}'") + if parent: + if async_queue != parent.async_queue: + raise TransformationError( + f"Cannot apply ACCKernelsTrans with asynchronous " + f"queue '{async_queue}' because a parent " + f"directive specifies queue '{parent.async_queue}'") parent = nodes[0].ancestor(Routine) if parent: @@ -156,7 +155,7 @@ def check_async_queue(nodes, async_queue): f"has an ENTER DATA directive specifying queue " f"'{edata[0].async_queue}'") - def validate(self, nodes, options=None): + def validate(self, nodes, options={}): # pylint: disable=signature-differs ''' Check that we can safely enclose the supplied node or list of nodes @@ -235,7 +234,7 @@ def validate(self, nodes, options=None): f"OpenACC region because it is not available on GPU.") # extract async option and check validity - async_queue = options.get('async_queue', False) if options else False + async_queue = options.get('async_queue', None) self.check_async_queue(node_list, async_queue) # Check that we have at least one loop or array range within diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index 165e5ef07c..231e5ef7a7 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -827,9 +827,13 @@ def test_accparalleldirective(): assert accpar._default_present is True # Also without default(present) - accpar = ACCParallelDirective(default_present=False) - assert isinstance(accpar, ACCParallelDirective) - assert accpar._default_present is False + accpar2 = ACCParallelDirective(default_present=False) + assert isinstance(accpar2, ACCParallelDirective) + assert accpar2._default_present is False + + # Call __eq__ + eq_result = accpar == accpar2 + assert eq_result is False # But only with boolean values with pytest.raises(TypeError) as err: @@ -838,22 +842,22 @@ def test_accparalleldirective(): "boolean but value '3' has been given." in str(err.value)) # The default present value has getter and setter - accpar.default_present = True - assert accpar.default_present is True + accpar2.default_present = True + assert accpar2.default_present is True with pytest.raises(TypeError) as err: - accpar.default_present = "invalid" + accpar2.default_present = "invalid" assert ("The ACCParallelDirective default_present property must be a " "boolean but value 'invalid' has been given." in str(err.value)) # The begin string depends on the default present value - accpar.default_present = True - assert accpar.begin_string() == "acc parallel default(present)" - accpar.default_present = False - assert accpar.begin_string() == "acc parallel" + accpar2.default_present = True + assert accpar2.begin_string() == "acc parallel default(present)" + accpar2.default_present = False + assert accpar2.begin_string() == "acc parallel" # It has an end_string - assert accpar.end_string() == "acc end parallel" + assert accpar2.end_string() == "acc end parallel" def test_acc_atomics_is_valid_atomic_statement(fortran_reader): diff --git a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py index 292240954f..f2ec5972b8 100644 --- a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py @@ -68,6 +68,30 @@ def test_kernels_single_node(fortran_reader): assert isinstance(schedule[0], ACCKernelsDirective) +def test_trigger_async_error(fortran_reader): + """Check that we can't apply an ACC Kernel Trans with + a parent using an async queue IDs that is different.""" + psyir = fortran_reader.psyir_from_source(EXPLICIT_LOOP) + acc_trans = ACCKernelsTrans() + + loop = psyir.walk(Loop)[0] + acc_trans.apply(loop, + {"default_present": True, + "async_queue": 2}) + + loop = psyir.walk(Loop)[0] + + with pytest.raises(TransformationError) as einfo: + acc_trans.apply(loop, {"default_present": True, + "async_queue": 3}) + + correct = ("Cannot apply ACCKernelsTrans with asynchronous" + " queue '3' because a parent directive specifies" + " queue '2'") + + assert correct in str(einfo.value) + + def test_no_kernels_error(fortran_reader): ''' Check that the transformation rejects an attempt to put things that aren't kernels inside a kernels region. ''' diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index 9d4650799a..fcd5b30816 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -2486,7 +2486,7 @@ def name(self): ''' return "ACCEnterDataTrans" - def apply(self, sched, options=None): + def apply(self, sched, options={}): # pylint: disable=arguments-renamed '''Adds an OpenACC "enter data" directive to the invoke associated with the supplied Schedule. Any fields accessed by OpenACC kernels @@ -2533,8 +2533,8 @@ def apply(self, sched, options=None): if options is None: options = {} - # extract async - async_queue = options.get('async_queue', False) + # extract async. Default to None + async_queue = options.get('async_queue', None) # check self.check_child_async(sched, async_queue) @@ -2566,7 +2566,7 @@ def check_child_async(self, sched, async_queue): 'Try to make an ACCEnterDataTrans with async_queue ' 'different than the one in child kernels !') - def validate(self, sched, options=None): + def validate(self, sched, options={}): # pylint: disable=arguments-differ, arguments-renamed ''' Check that we can safely apply the OpenACC enter-data transformation @@ -2597,7 +2597,7 @@ def validate(self, sched, options=None): # handle async option if options is None: options = {} - async_queue = options.get('async_queue', False) + async_queue = options.get('async_queue', None) # check consistency with childs about async_queue self.check_child_async(sched, async_queue) From 48930fd0b7df8bd49e18c0314cd560e8b737d954 Mon Sep 17 00:00:00 2001 From: SCHREIBER Martin Date: Tue, 26 Nov 2024 00:12:48 +0100 Subject: [PATCH 09/18] u --- src/psyclone/psyGen.py | 8 +- src/psyclone/psyir/nodes/acc_directives.py | 122 +++++++++++++-------- 2 files changed, 80 insertions(+), 50 deletions(-) diff --git a/src/psyclone/psyGen.py b/src/psyclone/psyGen.py index 4f1311fe18..9fce71ae88 100644 --- a/src/psyclone/psyGen.py +++ b/src/psyclone/psyGen.py @@ -2582,11 +2582,11 @@ class KernelArgument(Argument): arguments as specified by the kernel argument metadata and the kernel invocation in the Algorithm layer. - :param arg: information obtained from the metadata for this kernel \ - argument. + :param arg: information obtained from the metadata for this kernel + argument. :type arg: :py:class:`psyclone.parse.kernel.Descriptor` - :param arg_info: information on how this argument is specified in \ - the Algorithm layer. + :param arg_info: information on how this argument is specified in + the Algorithm layer. :type arg_info: :py:class:`psyclone.parse.algorithm.Arg` :param call: the PSyIR kernel node to which this argument pertains. :type call: :py:class:`psyclone.psyGen.Kern` diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index 61a11abf87..486a84094e 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -76,6 +76,7 @@ class ACCDirective(metaclass=abc.ABCMeta): Note that classes inheriting from it must place the ACCDirective in front of the other Directive node sub-class, so that the Python MRO gives preference to this class's attributes. + ''' _PREFIX = "ACC" @@ -150,8 +151,9 @@ class ACCRoutineDirective(ACCStandaloneDirective): ''' Class representing an "ACC routine" OpenACC directive in PSyIR. - :param str parallelism: the level of parallelism in the routine, one of + :param parallelism: the level of parallelism in the routine, one of "gang", "worker", "vector", "seq". + :type parallelism: str ''' SUPPORTED_PARALLELISM = ["seq", "vector", "worker", "gang"] @@ -225,15 +227,16 @@ class ACCEnterDataDirective(ACCStandaloneDirective, ACCAsyncMixin): :param children: list of nodes which the directive should have as children. :type children: List[:py:class:`psyclone.psyir.nodes.Node`] - :param parent: the node in the InvokeSchedule to which to add this \ - directive as a child. + :param parent: the node in the InvokeSchedule to which to add this + directive as a child. :type parent: :py:class:`psyclone.psyir.nodes.Node` :param async_queue: Enable async support and attach it to the given queue. - Can use False to disable, True to enable on default - stream. Int to attach to the given stream ID or use a - variable Signature to say at runtime what stream to be - used. - :type async_queue: bool | int | :py:class: psyclone.core.Signature + Can use False to disable, True to enable on default + stream. Int to attach to the given stream ID or use a + variable Signature to say at runtime what stream to be + used. + :type async_queue: bool | int | :py:class:psyclone.core.Signature + ''' def __init__(self, children=None, parent=None, async_queue=False): super().__init__(children=children, parent=parent) @@ -344,16 +347,18 @@ def data_on_device(self, parent): class ACCParallelDirective(ACCRegionDirective, ACCAsyncMixin): ''' Class representing the !$ACC PARALLEL directive of OpenACC - in the PSyIR. By default it includes the 'DEFAULT(PRESENT)' clause which + in the PSyIR. By default it includes the `DEFAULT(PRESENT)` clause which means this node must either come after an EnterDataDirective or within a DataDirective. - :param bool default_present: whether this directive includes the - 'DEFAULT(PRESENT)' clause. + :param default_present: whether this directive includes the + `DEFAULT(PRESENT)` clause or not. + :type default_present: bool :param async_queue: Make the directive asynchonous and attached to the - given stream identified by an ID or by a variable - name pointing to an integer. + given stream identified by an ID or by a variable + name pointing to an integer. :type async_queue: bool|:py:class:`psyclone.psyir.nodes.Reference`|int + ''' def __init__(self, async_queue=False, default_present=True, **kwargs): super().__init__(**kwargs) @@ -370,6 +375,7 @@ def __eq__(self, other): :returns: whether other is equal to self. :rtype: bool + ''' is_eq = super().__eq__(other) is_eq = is_eq and self.default_present == other.default_present @@ -426,6 +432,7 @@ def end_string(self): ''' :returns: the closing statement for this directive. :rtype: str + ''' return "acc end parallel" @@ -434,6 +441,7 @@ def default_present(self): ''' :returns: whether the directive includes the 'default(present)' clause. :rtype: bool + ''' return self._default_present @@ -460,6 +468,7 @@ def fields(self): :returns: list of names of field arguments. :rtype: List[str] + ''' # Look-up the kernels that are children of this node fld_list = [] @@ -485,7 +494,8 @@ class ACCLoopDirective(ACCRegionDirective): :param bool vector: whether or not to add the `vector` clause to the loop directive. :param kwargs: additional keyword arguments provided to the super class. - :type kwargs: unwrapped dict. + :type kwargs: dict. + ''' def __init__(self, collapse=None, independent=True, sequential=False, gang=False, vector=False, **kwargs): @@ -507,6 +517,7 @@ def __eq__(self, other): :returns: whether other is equal to self. :rtype: bool + ''' is_eq = super().__eq__(other) is_eq = is_eq and self.collapse == other.collapse @@ -535,7 +546,8 @@ def collapse(self): ''' :returns: the number of nested loops to collapse into a single \ iteration space for this node. - :rtype: int or None + :rtype: int | None + ''' return self._collapse @@ -571,6 +583,7 @@ def independent(self): :returns: whether the independent clause will be added to this loop \ directive. :rtype: bool + ''' return self._independent @@ -580,6 +593,7 @@ def sequential(self): :returns: whether or not the `seq` clause is added to this loop \ directive. :rtype: bool + ''' return self._sequential @@ -589,6 +603,7 @@ def gang(self): :returns: whether or not the `gang` clause is added to this loop directive. :rtype: bool + ''' return self._gang @@ -598,6 +613,7 @@ def vector(self): :returns: whether or not the `vector` clause is added to this loop directive. :rtype: bool + ''' return self._vector @@ -610,6 +626,7 @@ def node_str(self, colour=True): :returns: description of this node, possibly coloured. :rtype: str + ''' self._check_clauses_consistent() text = self.coloured_name(colour) @@ -649,10 +666,11 @@ def gen_code(self, parent): loop directive. :param parent: the parent Node in the Schedule to which to add our - content. + content. :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` - :raises GenerationError: if this "!$acc loop" is not enclosed within \ - an ACC Parallel region. + :raises GenerationError: if this "!$acc loop" is not enclosed within + an ACC Parallel region. + ''' self.validate_global_constraints() @@ -711,17 +729,17 @@ class ACCKernelsDirective(ACCRegionDirective, ACCAsyncMixin): ''' Class representing the !$ACC KERNELS directive in the PSyIR. - :param children: the PSyIR nodes to be enclosed in the Kernels region \ - and which are therefore children of this node. + :param children: the PSyIR nodes to be enclosed in the Kernels region + and which are therefore children of this node. :type children: List[:py:class:`psyclone.psyir.nodes.Node`] :param parent: the parent of this node in the PSyIR. :type parent: sub-class of :py:class:`psyclone.psyir.nodes.Node` - :param bool default_present: whether or not to add the \ - "default(present)" clause to the kernels \ - directive. - :param async_queue: Make the directive asynchonous and attached to the - given stream identified by an ID or by a variable - name pointing to an integer. + :param bool default_present: whether or not to add the + "default(present)" clause to the kernels + directive. + :param async_queue: Make the directive asynchronous and attached to the + given stream identified by an ID or by a variable + name pointing to an integer. :type async_queue: bool|:py:class:`psyclone.psyir.nodes.Reference`|int ''' @@ -741,6 +759,7 @@ def __eq__(self, other): :returns: whether other is equal to self. :rtype: bool + ''' is_eq = super().__eq__(other) is_eq = is_eq and self.default_present == other.default_present @@ -751,9 +770,10 @@ def __eq__(self, other): @property def default_present(self): ''' - :returns: whether the "default(present)" clause is added to the \ - kernels directive. + :returns: whether the "default(present)" clause is added to the + kernels directive. :rtype: bool + ''' return self._default_present @@ -762,8 +782,8 @@ def gen_code(self, parent): Generate the f2pygen AST entries in the Schedule for this OpenACC Kernels directive. - :param parent: the parent Node in the Schedule to which to add this \ - content. + :param parent: the parent Node in the Schedule to which to add this + content. :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` ''' @@ -854,6 +874,7 @@ def begin_string(self): ''' :returns: the beginning of the opening statement of this directive. :rtype: str + ''' return "acc data" @@ -904,29 +925,31 @@ def _update_data_movement_clauses(self): class ACCUpdateDirective(ACCStandaloneDirective, ACCAsyncMixin): ''' Class representing the OpenACC update directive in the PSyIR. It has - a direction attribute that can be set to 'self', 'host' or 'device', the + a direction attribute that can be set to `self`, `host` or `device`, the set of symbols being updated and an optional if_present clause. - :param signatures: the access signature(s) that need to be synchronised \ - with the device. + :param signatures: the access signature(s) that need to be synchronised + with the device. :type signatures: Set[:py:class:`psyclone.core.Signature`] - :param str direction: the direction of the synchronisation. + :param direction: the direction of the synchronisation. + :type direction: str :param children: list of nodes which the directive should have as children. :type children: List[:py:class:`psyclone.psyir.nodes.Node`] - :param parent: the node in the InvokeSchedule to which to add this \ - directive as a child. + :param parent: the node in the InvokeSchedule to which to add this + directive as a child. :type parent: :py:class:`psyclone.psyir.nodes.Node` - :param if_present: whether or not to include the 'if_present' - clause on the update directive (this instructs the - directive to silently ignore any variables that are not - on the device). - :param async_queue: Make the directive asynchonous and attached to the \ - given stream identified by an ID or by a variable name - pointing to an integer. - :type async_queue: Optional[ - bool|:py:class:`psyclone.psyir.nodes.Reference`|int - ] + :param if_present: whether or not to include the `if_present` + clause on the update directive (this instructs the + directive to silently ignore any variables that are not + on the device). :type if_present: Optional[bool] + :param async_queue: Make the directive asynchronous and attached to the + given stream identified by an ID or by a variable name + pointing to an integer. + :type async_queue: Optional[ + bool|:py:class:`psyclone.psyir.nodes.Reference`|int + ] + ''' _VALID_DIRECTIONS = ("self", "host", "device") @@ -948,6 +971,7 @@ def __eq__(self, other): :returns: whether other is equal to self. :rtype: bool + ''' is_eq = super().__eq__(other) is_eq = is_eq and self.sig_set == other.sig_set @@ -962,6 +986,7 @@ def sig_set(self): ''' :returns: the set of signatures to synchronise with the device. :rtype: Set[:py:class:`psyclone.core.Signature`] + ''' return self._sig_set @@ -970,6 +995,7 @@ def direction(self): ''' :returns: the direction of the synchronisation. :rtype: str + ''' return self._direction @@ -978,6 +1004,7 @@ def if_present(self): ''' :returns: whether or not to add the 'if_present' clause. :rtype: bool + ''' return self._if_present @@ -1081,6 +1108,7 @@ class ACCWaitDirective(ACCStandaloneDirective): :param wait_queue: Which ACC async stream to wait. None to wait all. :type wait_queue: Optional[:py:class:`psyclone.psyir.nodes.Reference`, int] + ''' def __init__(self, wait_queue=None): # call parent @@ -1093,6 +1121,7 @@ def __eq__(self, other): :returns: If the two directives are equals. :rtype: bool + ''' is_eq = super().__eq__(other) is_eq = is_eq and self._wait_queue == other._wait_queue @@ -1103,6 +1132,7 @@ def wait_queue(self): ''' :returns: The queue to wait on. :rtype: Optional[int, :py:class:`psyclone.psyir.nodes.Reference`] + ''' return self._wait_queue From ac7783d2e10af0d44d17f04f04fcaf4ff86dc21c Mon Sep 17 00:00:00 2001 From: SCHREIBER Martin Date: Tue, 26 Nov 2024 00:35:24 +0100 Subject: [PATCH 10/18] fixed generation of documentation --- src/psyclone/psyir/nodes/acc_directives.py | 40 +++++++++++-------- .../transformations/acc_kernels_trans.py | 2 +- src/psyclone/transformations.py | 4 -- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index 486a84094e..3445f77803 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -63,6 +63,8 @@ from psyclone.psyir.nodes.operation import BinaryOperation from psyclone.psyir.symbols import ScalarType +from typing import Optional, Union + class ACCDirective(metaclass=abc.ABCMeta): # pylint: disable=too-few-public-methods @@ -181,7 +183,8 @@ def parallelism(self, value): :raises TypeError: if `value` is not a str. :raises ValueError: if `value` is not a recognised level of - parallelism. + parallelism. + ''' if not isinstance(value, str): raise TypeError( @@ -235,10 +238,12 @@ class ACCEnterDataDirective(ACCStandaloneDirective, ACCAsyncMixin): stream. Int to attach to the given stream ID or use a variable Signature to say at runtime what stream to be used. - :type async_queue: bool | int | :py:class:psyclone.core.Signature ''' - def __init__(self, children=None, parent=None, async_queue=False): + def __init__( + self, children=None, parent=None, + async_queue: Optional[Union[bool, Reference]] = None + ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) self._acc_dirs = None # List of parallel directives @@ -353,14 +358,17 @@ class ACCParallelDirective(ACCRegionDirective, ACCAsyncMixin): :param default_present: whether this directive includes the `DEFAULT(PRESENT)` clause or not. - :type default_present: bool - :param async_queue: Make the directive asynchonous and attached to the + :param async_queue: Make the directive asynchronous and attached to the given stream identified by an ID or by a variable name pointing to an integer. - :type async_queue: bool|:py:class:`psyclone.psyir.nodes.Reference`|int ''' - def __init__(self, async_queue=False, default_present=True, **kwargs): + def __init__( + self, + async_queue: Union[bool, Reference, int] = False, + default_present: bool = True, + **kwargs + ): super().__init__(**kwargs) ACCAsyncMixin.__init__(self, async_queue) self.default_present = default_present @@ -740,12 +748,13 @@ class ACCKernelsDirective(ACCRegionDirective, ACCAsyncMixin): :param async_queue: Make the directive asynchronous and attached to the given stream identified by an ID or by a variable name pointing to an integer. - :type async_queue: bool|:py:class:`psyclone.psyir.nodes.Reference`|int ''' - def __init__(self, children=None, parent=None, default_present=True, - async_queue=False): + def __init__( + self, children=None, parent=None, default_present=True, + async_queue: Optional[Union[bool, Reference]] = None + ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) self._default_present = default_present @@ -942,20 +951,19 @@ class ACCUpdateDirective(ACCStandaloneDirective, ACCAsyncMixin): clause on the update directive (this instructs the directive to silently ignore any variables that are not on the device). - :type if_present: Optional[bool] :param async_queue: Make the directive asynchronous and attached to the given stream identified by an ID or by a variable name pointing to an integer. - :type async_queue: Optional[ - bool|:py:class:`psyclone.psyir.nodes.Reference`|int - ] ''' _VALID_DIRECTIONS = ("self", "host", "device") - def __init__(self, signatures, direction, children=None, parent=None, - if_present=True, async_queue=False): + def __init__( + self, signatures, direction, children=None, parent=None, + if_present: Optional[bool] = True, + async_queue: Optional[Union[bool, Reference]] = None + ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) self.sig_set = signatures diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index 531e3c0afc..844178d722 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -124,7 +124,7 @@ def check_async_queue(nodes, async_queue): :type nodes: list[:py:class:`psyclone.psyir.nodes.Node`] :param async_queue: The async queue to expect in parents. - :type async_queue: \ + :type async_queue: Optional[bool, int, :py:class:`psyclone.core.Reference`] ''' diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index fcd5b30816..0442da529c 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -2529,10 +2529,6 @@ def apply(self, sched, options={}): current = current.parent posn = sched.children.index(current) - # handle default empty options - if options is None: - options = {} - # extract async. Default to None async_queue = options.get('async_queue', None) From 81b9038331019b2bd779943d0abf55bebe7556cc Mon Sep 17 00:00:00 2001 From: SCHREIBER Martin Date: Tue, 26 Nov 2024 10:21:12 +0100 Subject: [PATCH 11/18] updates for CI --- src/psyclone/psyGen.py | 2 + src/psyclone/psyir/nodes/acc_directives.py | 274 ++++++++---------- src/psyclone/psyir/nodes/acc_mixins.py | 24 +- .../transformations/acc_kernels_trans.py | 43 +-- .../tests/psyir/nodes/acc_directives_test.py | 19 +- .../transformations/acc_kernels_trans_test.py | 2 + src/psyclone/transformations.py | 2 + 7 files changed, 173 insertions(+), 193 deletions(-) diff --git a/src/psyclone/psyGen.py b/src/psyclone/psyGen.py index 9fce71ae88..7bd700638d 100644 --- a/src/psyclone/psyGen.py +++ b/src/psyclone/psyGen.py @@ -35,6 +35,8 @@ # Modified by I. Kavcic and L. Turner, Met Office # Modified by C.M. Maynard, Met Office / University of Reading # Modified by J. Henrichs, Bureau of Meteorology +# Modified S. Valat, Inria / Laboratoire Jean Kuntzmann +# Modified M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann # ----------------------------------------------------------------------------- ''' This module provides generic support for PSyclone's PSy code optimisation diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index 3445f77803..16efc30584 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -36,8 +36,9 @@ # C.M. Maynard, Met Office / University of Reading # J. Henrichs, Bureau of Meteorology # Modified A. B. G. Chalk, STFC Daresbury Lab -# S. Valat, INRIA / LJK +# S. Valat, Inria / Laboratoire Jean Kuntzmann # J. G. Wallwork, Met Office / University of Cambridge +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann # ----------------------------------------------------------------------------- ''' This module contains the implementation of the various OpenACC Directive @@ -51,6 +52,7 @@ from psyclone.psyir.nodes.acc_clauses import (ACCCopyClause, ACCCopyInClause, ACCCopyOutClause) from psyclone.psyir.nodes.assignment import Assignment +from psyclone.psyir.nodes.node import Node from psyclone.psyir.nodes.codeblock import CodeBlock from psyclone.psyir.nodes.directive import (StandaloneDirective, RegionDirective) @@ -63,7 +65,9 @@ from psyclone.psyir.nodes.operation import BinaryOperation from psyclone.psyir.symbols import ScalarType -from typing import Optional, Union +from psyclone.f2pygen import BaseGen + +from typing import Dict, List, Optional, Set, Tuple, Union class ACCDirective(metaclass=abc.ABCMeta): @@ -110,7 +114,12 @@ def validate_global_constraints(self): f"region enclosed by an '{type(self).__name__}'") @property - def signatures(self): + def signatures(self) -> Union[ + Tuple[Set[Signature]], + Tuple[ + Set[Signature], Set[Signature] + ] + ]: ''' Returns a 1-tuple or a 2-tuple of sets depending on the working API. If a 1-tuple, the set includes both input and output signatures @@ -121,9 +130,6 @@ def signatures(self): device (probably a GPU) before the parallel region can be begun. :returns: 1-tuple or 2-tuple of input and output sets of variable names - :rtype: Union[Tuple[Set[:py:class:`psyclone.core.Signature`]], \ - Tuple[Set[:py:class:`psyclone.core.Signature`], \ - Set[:py:class:`psyclone.core.Signature`]]] ''' # pylint: disable=import-outside-toplevel @@ -154,14 +160,14 @@ class ACCRoutineDirective(ACCStandaloneDirective): Class representing an "ACC routine" OpenACC directive in PSyIR. :param parallelism: the level of parallelism in the routine, one of - "gang", "worker", "vector", "seq". - :type parallelism: str + "gang", "seq", "vector", "worker". ''' - SUPPORTED_PARALLELISM = ["seq", "vector", "worker", "gang"] + SUPPORTED_PARALLELISM = ["gang", "seq", "vector", "worker"] - def __init__(self, parallelism="seq", **kwargs): + def __init__(self, parallelism: str = "seq", **kwargs): self.parallelism = parallelism + assert self.parallelism in self.SUPPORTED_PARALLELISM super().__init__(self, **kwargs) @@ -176,10 +182,10 @@ def parallelism(self): return self._parallelism @parallelism.setter - def parallelism(self, value): + def parallelism(self, value: str): ''' - :param str value: the new value for the level-of-parallelism within - this routine (or a called one). + :param value: the new value for the level-of-parallelism within + this routine (or a called one). :raises TypeError: if `value` is not a str. :raises ValueError: if `value` is not a recognised level of @@ -196,12 +202,11 @@ def parallelism(self, value): f"of parallelism but got '{value}'") self._parallelism = value.lower() - def gen_code(self, parent): + def gen_code(self, parent: BaseGen): '''Generate the Fortran ACC Routine Directive and any associated code. :param parent: the parent Node in the Schedule to which to add our content. - :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` ''' # Check the constraints are correct self.validate_global_constraints() @@ -210,13 +215,12 @@ def gen_code(self, parent): parent.add(DirectiveGen(parent, "acc", "begin", "routine", f"{self.parallelism}")) - def begin_string(self): + def begin_string(self) -> str: '''Returns the beginning statement of this directive, i.e. "acc routine". The visitor is responsible for adding the correct directive beginning (e.g. "!$"). :returns: the opening statement of this directive. - :rtype: str ''' return f"acc routine {self.parallelism}" @@ -229,10 +233,8 @@ class ACCEnterDataDirective(ACCStandaloneDirective, ACCAsyncMixin): in which fields are marked as being on the remote device is API-dependent. :param children: list of nodes which the directive should have as children. - :type children: List[:py:class:`psyclone.psyir.nodes.Node`] :param parent: the node in the InvokeSchedule to which to add this directive as a child. - :type parent: :py:class:`psyclone.psyir.nodes.Node` :param async_queue: Enable async support and attach it to the given queue. Can use False to disable, True to enable on default stream. Int to attach to the given stream ID or use a @@ -241,8 +243,10 @@ class ACCEnterDataDirective(ACCStandaloneDirective, ACCAsyncMixin): ''' def __init__( - self, children=None, parent=None, - async_queue: Optional[Union[bool, Reference]] = None + self, + children: List[Node] = None, + parent: Node = None, + async_queue: Union[bool, int, Reference, None] = None ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) @@ -250,12 +254,11 @@ def __init__( self._sig_set = set() - def gen_code(self, parent): + def gen_code(self, parent: BaseGen): '''Generate the elements of the f2pygen AST for this Node in the Schedule. :param parent: node in the f2pygen AST to which to add node(s). - :type parent: :py:class:`psyclone.f2pygen.BaseGen` :raises GenerationError: if no data is found to copy in. @@ -280,13 +283,12 @@ def gen_code(self, parent): self.data_on_device(parent) parent.add(CommentGen(parent, "")) - def lower_to_language_level(self): + def lower_to_language_level(self) -> Node: ''' In-place replacement of this directive concept into language level PSyIR constructs. :returns: the lowered version of this node. - :rtype: :py:class:`psyclone.psyir.node.Node` ''' # We must generate a list of all of the fields accessed within OpenACC @@ -337,7 +339,7 @@ def begin_string(self): # and members of composite variables must appear later in deep copies. return f"acc enter data{options}" - def data_on_device(self, parent): + def data_on_device(self, parent: Node): ''' Adds nodes into an InvokeSchedule to flag that the data required by the kernels in the data region is now on the device. The generic @@ -345,7 +347,6 @@ def data_on_device(self, parent): APIs if any infrastructure call is needed. :param parent: the node in the InvokeSchedule to which to add nodes - :type parent: :py:class:`psyclone.psyir.nodes.Node` ''' @@ -358,14 +359,14 @@ class ACCParallelDirective(ACCRegionDirective, ACCAsyncMixin): :param default_present: whether this directive includes the `DEFAULT(PRESENT)` clause or not. - :param async_queue: Make the directive asynchronous and attached to the - given stream identified by an ID or by a variable - name pointing to an integer. + :param async_queue: If `True`, make the directive asynchronous. + If of type `int` or a `Reference`, also attach it to the given stream + identified by an ID or by a variable name pointing to an integer. ''' def __init__( self, - async_queue: Union[bool, Reference, int] = False, + async_queue: Union[bool, int, Reference, None] = None, default_present: bool = True, **kwargs ): @@ -373,7 +374,7 @@ def __init__( ACCAsyncMixin.__init__(self, async_queue) self.default_present = default_present - def __eq__(self, other): + def __eq__(self, other) -> bool: ''' Checks whether two nodes are equal. Two ACCParallelDirective nodes are equal if their default_present members are equal and they use the @@ -382,7 +383,6 @@ def __eq__(self, other): :param object other: the object to check equality to. :returns: whether other is equal to self. - :rtype: bool ''' is_eq = super().__eq__(other) @@ -391,12 +391,11 @@ def __eq__(self, other): return is_eq - def gen_code(self, parent): + def gen_code(self, parent: BaseGen): ''' Generate the elements of the f2pygen AST for this Node in the Schedule. :param parent: node in the f2pygen AST to which to add node(s). - :type parent: :py:class:`psyclone.f2pygen.BaseGen` ''' self.validate_global_constraints() @@ -416,14 +415,13 @@ def gen_code(self, parent): self.gen_post_region_code(parent) - def begin_string(self): + def begin_string(self) -> str: ''' Returns the beginning statement of this directive, i.e. "acc parallel" plus any qualifiers. The backend is responsible for adding the correct characters to mark this as a directive (e.g. "!$"). :returns: the opening statement of this directive. - :rtype: str ''' options = "" @@ -436,19 +434,17 @@ def begin_string(self): options += self._build_async_string() return f"acc parallel{options}" - def end_string(self): + def end_string(self) -> str: ''' :returns: the closing statement for this directive. - :rtype: str ''' return "acc end parallel" @property - def default_present(self): + def default_present(self) -> bool: ''' :returns: whether the directive includes the 'default(present)' clause. - :rtype: bool ''' return self._default_present @@ -469,13 +465,12 @@ def default_present(self, value): self._default_present = value @property - def fields(self): + def fields(self) -> List[str]: ''' Returns a list of the names of field objects required by the Kernel call(s) that are children of this directive. :returns: list of names of field arguments. - :rtype: List[str] ''' # Look-up the kernels that are children of this node @@ -491,22 +486,28 @@ class ACCLoopDirective(ACCRegionDirective): ''' Class managing the creation of a '!$acc loop' OpenACC directive. - :param int collapse: Number of nested loops to collapse into a single - iteration space or None. - :param bool independent: Whether or not to add the `independent` clause - to the loop directive. - :param bool sequential: whether or not to add the `seq` clause to the - loop directive. - :param bool gang: whether or not to add the `gang` clause to the - loop directive. - :param bool vector: whether or not to add the `vector` clause to the - loop directive. + :param collapse: Number of nested loops to collapse into a single + iteration space or None. + :param independent: Whether or not to add the `independent` clause + to the loop directive. + :param sequential: whether or not to add the `seq` clause to the + loop directive. + :param gang: whether or not to add the `gang` clause to the + loop directive. + :param vector: whether or not to add the `vector` clause to the + loop directive. :param kwargs: additional keyword arguments provided to the super class. - :type kwargs: dict. ''' - def __init__(self, collapse=None, independent=True, sequential=False, - gang=False, vector=False, **kwargs): + def __init__( + self, + collapse: int = None, + independent: bool = True, + sequential: bool = False, + gang: bool = False, + vector: bool = False, + **kwargs: Dict + ): self.collapse = collapse self._independent = independent self._sequential = sequential @@ -515,7 +516,7 @@ def __init__(self, collapse=None, independent=True, sequential=False, self._check_clauses_consistent() super().__init__(**kwargs) - def __eq__(self, other): + def __eq__(self, other) -> bool: ''' Checks whether two nodes are equal. Two ACCLoopDirective nodes are equal if their collapse, independent, sequential, gang, and vector @@ -524,7 +525,6 @@ def __eq__(self, other): :param object other: the object to check equality to. :returns: whether other is equal to self. - :rtype: bool ''' is_eq = super().__eq__(other) @@ -550,21 +550,19 @@ def _check_clauses_consistent(self): ) @property - def collapse(self): + def collapse(self) -> Union[int, None]: ''' :returns: the number of nested loops to collapse into a single \ iteration space for this node. - :rtype: int | None ''' return self._collapse @collapse.setter - def collapse(self, value): + def collapse(self, value: Optional[int]): ''' :param value: optional number of nested loop to collapse into a \ single iteration space to parallelise. Defaults to None. - :type value: Optional[int] :raises TypeError: if the collapse value given is not an integer \ or NoneType. @@ -584,56 +582,50 @@ def collapse(self, value): self._collapse = value @property - def independent(self): + def independent(self) -> bool: ''' Returns whether the independent clause will be added to this loop directive. :returns: whether the independent clause will be added to this loop \ directive. - :rtype: bool ''' return self._independent @property - def sequential(self): + def sequential(self) -> bool: ''' :returns: whether or not the `seq` clause is added to this loop \ directive. - :rtype: bool ''' return self._sequential @property - def gang(self): + def gang(self) -> bool: ''' :returns: whether or not the `gang` clause is added to this loop directive. - :rtype: bool ''' return self._gang @property - def vector(self): + def vector(self) -> bool: ''' :returns: whether or not the `vector` clause is added to this loop directive. - :rtype: bool ''' return self._vector - def node_str(self, colour=True): - ''' - Returns the name of this node with (optional) control codes + def node_str(self, colour: bool = True) -> str: + '''Returns the name of this node with (optional) control codes to generate coloured output in a terminal that supports it. - :param bool colour: whether or not to include colour control codes. + :param colour: whether or not to include colour control codes. :returns: description of this node, possibly coloured. - :rtype: str ''' self._check_clauses_consistent() @@ -646,8 +638,7 @@ def node_str(self, colour=True): return text def validate_global_constraints(self): - ''' - Perform validation of those global constraints that can only be done + '''Perform validation of those global constraints that can only be done at code-generation time. :raises GenerationError: if this ACCLoopDirective is not enclosed @@ -668,14 +659,13 @@ def validate_global_constraints(self): super().validate_global_constraints() - def gen_code(self, parent): - ''' - Generate the f2pygen AST entries in the Schedule for this OpenACC + def gen_code(self, parent: BaseGen): + '''Generate the f2pygen AST entries in the Schedule for this OpenACC loop directive. :param parent: the parent Node in the Schedule to which to add our content. - :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` + :raises GenerationError: if this "!$acc loop" is not enclosed within an ACC Parallel region. @@ -691,16 +681,15 @@ def gen_code(self, parent): for child in self.children: child.gen_code(parent) - def begin_string(self, leading_acc=True): + def begin_string(self, leading_acc: bool = True) -> str: ''' Returns the opening statement of this directive, i.e. "acc loop" plus any qualifiers. If `leading_acc` is False then the leading "acc loop" text is not included. - :param bool leading_acc: whether or not to include the leading \ - "acc loop" in the text that is returned. + :param leading_acc: whether or not to include the leading + "acc loop" in the text that is returned. :returns: the opening statement of this directive. - :rtype: str ''' clauses = [] @@ -721,27 +710,23 @@ def begin_string(self, leading_acc=True): clauses += [f"collapse({self.collapse})"] return " ".join(clauses) - def end_string(self): + def end_string(self) -> str: ''' Would return the end string for this directive but "acc loop" doesn't have a closing directive. :returns: empty string. - :rtype: str ''' return "" class ACCKernelsDirective(ACCRegionDirective, ACCAsyncMixin): - ''' - Class representing the !$ACC KERNELS directive in the PSyIR. + '''Class representing the `!$ACC KERNELS` directive in the PSyIR. :param children: the PSyIR nodes to be enclosed in the Kernels region and which are therefore children of this node. - :type children: List[:py:class:`psyclone.psyir.nodes.Node`] :param parent: the parent of this node in the PSyIR. - :type parent: sub-class of :py:class:`psyclone.psyir.nodes.Node` :param bool default_present: whether or not to add the "default(present)" clause to the kernels directive. @@ -752,8 +737,11 @@ class ACCKernelsDirective(ACCRegionDirective, ACCAsyncMixin): ''' def __init__( - self, children=None, parent=None, default_present=True, - async_queue: Optional[Union[bool, Reference]] = None + self, + children: List[Node] = None, + parent: Node = None, + default_present: bool = True, + async_queue: Union[bool, int, Reference, None] = None ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) @@ -777,23 +765,21 @@ def __eq__(self, other): return is_eq @property - def default_present(self): + def default_present(self) -> bool: ''' :returns: whether the "default(present)" clause is added to the kernels directive. - :rtype: bool ''' return self._default_present - def gen_code(self, parent): + def gen_code(self, parent: BaseGen): ''' Generate the f2pygen AST entries in the Schedule for this OpenACC Kernels directive. :param parent: the parent Node in the Schedule to which to add this content. - :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` ''' self.validate_global_constraints() @@ -809,13 +795,12 @@ def gen_code(self, parent): self.gen_post_region_code(parent) - def begin_string(self): + def begin_string(self) -> str: '''Returns the beginning statement of this directive, i.e. "acc kernels ...". The backend is responsible for adding the correct directive beginning (e.g. "!$"). :returns: the beginning statement for this directive. - :rtype: str ''' result = "acc kernels" @@ -829,14 +814,12 @@ def begin_string(self): return result - def end_string(self): - ''' - Returns the ending statement for this directive. The backend is + def end_string(self) -> str: + '''Returns the ending statement for this directive. The backend is responsible for adding the language-specific syntax that marks this as a directive. :returns: the closing statement for this directive. - :rtype: str ''' return "acc end kernels" @@ -860,18 +843,16 @@ def gen_code(self, _): "ACCDataDirective.gen_code should not have been called.") @staticmethod - def _validate_child(position, child): + def _validate_child(position: int, child: Node) -> str: ''' Check that the supplied node is a valid child of this node at the specified position. - :param int position: the proposed position of this child in the list + :param position: the proposed position of this child in the list of children. :param child: the proposed child node. - :type child: :py:class:`psyclone.psyir.nodes.Node` :returns: whether or not the proposed child and position are valid. - :rtype: bool ''' if position == 0: @@ -879,18 +860,16 @@ def _validate_child(position, child): return isinstance(child, (ACCCopyClause, ACCCopyInClause, ACCCopyOutClause)) - def begin_string(self): + def begin_string(self) -> str: ''' :returns: the beginning of the opening statement of this directive. - :rtype: str ''' return "acc data" - def end_string(self): + def end_string(self) -> str: ''' :returns: the text for the end of this directive region. - :rtype: str ''' return "acc end data" @@ -939,14 +918,10 @@ class ACCUpdateDirective(ACCStandaloneDirective, ACCAsyncMixin): :param signatures: the access signature(s) that need to be synchronised with the device. - :type signatures: Set[:py:class:`psyclone.core.Signature`] :param direction: the direction of the synchronisation. - :type direction: str :param children: list of nodes which the directive should have as children. - :type children: List[:py:class:`psyclone.psyir.nodes.Node`] :param parent: the node in the InvokeSchedule to which to add this directive as a child. - :type parent: :py:class:`psyclone.psyir.nodes.Node` :param if_present: whether or not to include the `if_present` clause on the update directive (this instructs the directive to silently ignore any variables that are not @@ -960,9 +935,13 @@ class ACCUpdateDirective(ACCStandaloneDirective, ACCAsyncMixin): _VALID_DIRECTIONS = ("self", "host", "device") def __init__( - self, signatures, direction, children=None, parent=None, + self, + signatures: Signature, + direction: str, + children: List[Node] = None, + parent: Node = None, if_present: Optional[bool] = True, - async_queue: Optional[Union[bool, Reference]] = None + async_queue: Union[bool, int, Reference, None] = None ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) @@ -970,7 +949,7 @@ def __init__( self.direction = direction self.if_present = if_present - def __eq__(self, other): + def __eq__(self, other) -> bool: ''' Checks whether two nodes are equal. Two ACCUpdateDirective nodes are equal if their sig_set, direction and if_present members are equal. @@ -978,7 +957,6 @@ def __eq__(self, other): :param object other: the object to check equality to. :returns: whether other is equal to self. - :rtype: bool ''' is_eq = super().__eq__(other) @@ -990,38 +968,34 @@ def __eq__(self, other): return is_eq @property - def sig_set(self): + def sig_set(self) -> Signature: ''' :returns: the set of signatures to synchronise with the device. - :rtype: Set[:py:class:`psyclone.core.Signature`] ''' return self._sig_set @property - def direction(self): + def direction(self) -> str: ''' :returns: the direction of the synchronisation. - :rtype: str ''' return self._direction @property - def if_present(self): + def if_present(self) -> bool: ''' :returns: whether or not to add the 'if_present' clause. - :rtype: bool ''' return self._if_present @sig_set.setter - def sig_set(self, signatures): + def sig_set(self, signatures: Signature): ''' :param signatures: the access signature(s) that need to be \ synchronised with the device. - :type signatures: Set[:py:class:`psyclone.core.Signature`] :raises TypeError: if signatures is not a set of access signatures. ''' @@ -1034,9 +1008,9 @@ def sig_set(self, signatures): self._sig_set = signatures @direction.setter - def direction(self, direction): + def direction(self, direction: str): ''' - :param str direction: the direction of the synchronisation. + :param direction: the direction of the synchronisation. :raises ValueError: if the direction argument is not a string with \ value 'self', 'host' or 'device'. @@ -1050,12 +1024,12 @@ def direction(self, direction): self._direction = direction @if_present.setter - def if_present(self, if_present): + def if_present(self, if_present: bool): ''' - :param bool if_present: whether or not to add the 'if_present' \ + :param if_present: whether or not to add the 'if_present' \ clause. - :raises TypeError: if if_present is not a boolean. + :raises TypeError: if `if_present` is not a boolean. ''' if not isinstance(if_present, bool): raise TypeError( @@ -1064,14 +1038,13 @@ def if_present(self, if_present): self._if_present = if_present - def begin_string(self): + def begin_string(self) -> str: ''' Returns the beginning statement of this directive, i.e. "acc update host(symbol)". The backend is responsible for adding the correct characters to mark this as a directive (e.g. "!$"). :returns: the opening statement of this directive. - :rtype: str ''' if not self._sig_set: @@ -1094,16 +1067,14 @@ def begin_string(self): f"acc update {condition}{self._direction}({sym_list}){asyncvalue}" -def _sig_set_to_string(sig_set): +def _sig_set_to_string(sig_set: Set[Signature]) -> str: ''' Converts the provided set of signatures into a lexically sorted string of comma-separated signatures which also includes, for signatures that represent variables of a derived type, the composing subsignatures. :param sig_set: set of signature(s) to include in the string. - :type sig_set: Set[:py:class:`psyclone.core.Signature`] :returns: a lexically sorted string of comma-separated (sub)signatures. - :rtype: str ''' names = {s[:i+1].to_language() for s in sig_set for i in range(len(s))} @@ -1115,20 +1086,18 @@ class ACCWaitDirective(ACCStandaloneDirective): Class representing the !$ACC WAIT directive in the PSyIR. :param wait_queue: Which ACC async stream to wait. None to wait all. - :type wait_queue: Optional[:py:class:`psyclone.psyir.nodes.Reference`, int] ''' - def __init__(self, wait_queue=None): + def __init__(self, wait_queue: Union[Reference, int, None] = None): # call parent super().__init__() self.wait_queue = wait_queue - def __eq__(self, other): + def __eq__(self, other) -> bool: ''' Test the equality of two directives. :returns: If the two directives are equals. - :rtype: bool ''' is_eq = super().__eq__(other) @@ -1136,22 +1105,19 @@ def __eq__(self, other): return is_eq @property - def wait_queue(self): + def wait_queue(self) -> Union[int, Reference, None]: ''' :returns: The queue to wait on. - :rtype: Optional[int, :py:class:`psyclone.psyir.nodes.Reference`] ''' return self._wait_queue @wait_queue.setter - def wait_queue(self, wait_queue): + def wait_queue(self, wait_queue: Union[int, Reference, None]): ''' Setter to assign a specific wait queue to wait for. :param wait_queue: The wait queue to expect, or None for all. - :type wait_queue: Optional[int, \ - :py:class:`psyclone.psyir.nodes.Reference`] :raises TypeError: if `wait_queue` is of the wrong type ''' @@ -1164,13 +1130,12 @@ def wait_queue(self, wait_queue): # set self._wait_queue = wait_queue - def gen_code(self, parent): + def gen_code(self, parent: BaseGen): ''' Generate the given directive code to add it to the call tree. :param parent: the parent Node in the Schedule to which to add this \ content. - :type parent: sub-class of :py:class:`psyclone.f2pygen.BaseGen` ''' # remove the "acc wait" added by begin_string() and keep only the # parameters @@ -1179,13 +1144,12 @@ def gen_code(self, parent): # Generate the directive parent.add(DirectiveGen(parent, "acc", "begin", "wait", args)) - def begin_string(self): + def begin_string(self) -> str: '''Returns the beginning statement of this directive, i.e. "acc wait ...". The backend is responsible for adding the correct directive beginning (e.g. "!$"). :returns: the beginning statement for this directive. - :rtype: str ''' # default basic directive @@ -1210,32 +1174,28 @@ class ACCAtomicDirective(ACCRegionDirective): currently unsupported in the PSyIR. ''' - def begin_string(self): + def begin_string(self) -> str: ''' :returns: the opening string statement of this directive. - :rtype: str ''' return "acc atomic" - def end_string(self): + def end_string(self) -> str: ''' :returns: the ending string statement of this directive. - :rtype: str ''' return "acc end atomic" @staticmethod - def is_valid_atomic_statement(stmt): + def is_valid_atomic_statement(stmt: Node) -> bool: ''' Check if a given statement is a valid OpenACC atomic expression. :param stmt: a node to be validated. - :type stmt: :py:class:`psyclone.psyir.nodes.Node` :returns: whether a given statement is compliant with the OpenACC atomic expression. - :rtype: bool ''' if not isinstance(stmt, Assignment): diff --git a/src/psyclone/psyir/nodes/acc_mixins.py b/src/psyclone/psyir/nodes/acc_mixins.py index b19275a5c9..2ad794ed82 100644 --- a/src/psyclone/psyir/nodes/acc_mixins.py +++ b/src/psyclone/psyir/nodes/acc_mixins.py @@ -31,7 +31,8 @@ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -# Authors S. Valat, INRIA / LJK +# Authors S. Valat, Inria / Lab. Jean Kuntzmann +# Modified M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann # ----------------------------------------------------------------------------- ''' This module contains the mixins to apply some ACC features on many @@ -41,6 +42,8 @@ from psyclone.psyir.nodes.reference import Reference +from typing import Union + class ACCAsyncMixin(metaclass=abc.ABCMeta): ''' @@ -52,32 +55,33 @@ class ACCAsyncMixin(metaclass=abc.ABCMeta): stream. Use int to attach to the given stream ID or use a variable Reference to say at runtime what stream to be used. - :type async_queue: bool | int | :py:class:`psyclone.core.Reference` ''' - def __init__(self, async_queue=None): + def __init__( + self, + async_queue: Union[bool, int, Reference, None] = None + ): self.async_queue = async_queue @property - def async_queue(self): + def async_queue(self) -> Union[bool, int, Reference, None]: ''' :returns: whether or not async is enabled and if so, which queue this node is associated with. (True indicates the default stream.) Can use False to disable, True to enable on default stream. Int to attach to the given stream ID or use a variable Reference to say at runtime what stream to be used. - :rtype async_queue: bool | int | :py:class:`psyclone.core.Reference` ''' return self._async_queue @async_queue.setter - def async_queue(self, async_queue): + def async_queue(self, async_queue: Union[bool, int, Reference, None]): ''' :param async_queue: Enable async support and attach it to the given queue. Can use False to disable, True to enable on default stream. Int to attach to the given stream ID or use a variable Reference to say at runtime what stream to be used. - :type async_queue: bool | int | :py:class:`psyclone.core.Reference` + :raises TypeError: if `wait_queue` is of the wrong type ''' # check @@ -89,13 +93,12 @@ def async_queue(self, async_queue): # assign self._async_queue = async_queue - def _build_async_string(self): + def _build_async_string(self) -> str: ''' Build the async arg to concat to the acc directive when generating the code. :returns: The `async()` option to add to the directive. - :rtype: str ''' # default result = "" @@ -115,7 +118,7 @@ def _build_async_string(self): # ok return result - def __eq__(self, other): + def __eq__(self, other) -> bool: ''' Checks whether two nodes are equal. Two ACCAsyncMixin are equal if their async_queue members are equal. @@ -123,7 +126,6 @@ def __eq__(self, other): :param object other: the object to check equality to. :returns: whether other is equal to self. - :rtype: bool ''' if type(self) is not type(other): return False diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index 844178d722..46fc19f326 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -35,6 +35,8 @@ # A. B. G. Chalk STFC Daresbury Lab # J. Henrichs, Bureau of Meteorology # Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann ''' This module provides the ACCKernelsTrans transformation. ''' @@ -44,13 +46,15 @@ from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes import ( ACCEnterDataDirective, ACCKernelsDirective, Assignment, - Call, CodeBlock, Loop, + Call, CodeBlock, Loop, Node, PSyDataNode, Reference, Return, Routine, Statement, WhileLoop) from psyclone.psyir.symbols import UnsupportedFortranType from psyclone.psyir.transformations.region_trans import RegionTrans from psyclone.psyir.transformations.transformation_error import ( TransformationError) +from typing import Any, Dict, List, Union + class ACCKernelsTrans(RegionTrans): ''' @@ -76,17 +80,18 @@ class ACCKernelsTrans(RegionTrans): excluded_node_types = (CodeBlock, Return, PSyDataNode, psyGen.HaloExchange, WhileLoop) - def apply(self, node, options={}): + def apply( + self, + node: Union[Node, List[Node]], + options: Dict[str, Any] = {} + ): ''' Enclose the supplied list of PSyIR nodes within an OpenACC Kernels region. :param node: a node or list of nodes in the PSyIR to enclose. - :type node: :py:class:`psyclone.psyir.nodes.Node` | - list[:py:class:`psyclone.psyir.nodes.Node`] :param options: a dictionary with options for transformations. - :type options: Optional[Dict[str, Any]] - :param bool options["default_present"]: whether or not the kernels + "default_present": boolean whether or not the kernels region should have the 'default present' attribute (indicating that data is already on the accelerator). When using managed memory this option should be False. @@ -115,17 +120,16 @@ def apply(self, node, options={}): parent.children.insert(start_index, directive) @staticmethod - def check_async_queue(nodes, async_queue): + def check_async_queue( + nodes: List[Node], + async_queue: Union[bool, int, Reference, None] + ): ''' Common function to check that all parent data directives have the same async queue. :param node: the nodes in the PSyIR to enclose. - :type nodes: list[:py:class:`psyclone.psyir.nodes.Node`] - :param async_queue: The async queue to expect in parents. - :type async_queue: - Optional[bool, int, :py:class:`psyclone.core.Reference`] ''' if async_queue is None: @@ -136,8 +140,10 @@ def check_async_queue(nodes, async_queue): raise TypeError(f"Invalid async_queue value, expect Reference or " f"integer or None or False, got : {async_queue}") + # Perform an additional check whether a queue has been used before. + # Note this to work only for the current routine. parent = nodes[0].ancestor(ACCAsyncMixin) - if parent: + if parent is not None: if async_queue != parent.async_queue: raise TransformationError( f"Cannot apply ACCKernelsTrans with asynchronous " @@ -155,7 +161,11 @@ def check_async_queue(nodes, async_queue): f"has an ENTER DATA directive specifying queue " f"'{edata[0].async_queue}'") - def validate(self, nodes, options={}): + def validate( + self, + nodes: List[Node], + options: Dict[str, Any] = {} + ): # pylint: disable=signature-differs ''' Check that we can safely enclose the supplied node or list of nodes @@ -163,13 +173,12 @@ def validate(self, nodes, options={}): :param nodes: the proposed PSyIR node or nodes to enclose in the kernels region. - :type nodes: (list of) :py:class:`psyclone.psyir.nodes.Node` :param options: a dictionary with options for transformations. - :type options: Optional[Dict[str, Any]] - :param bool options["disable_loop_check"]: whether to disable the + "disable_loop_check": boolean whether to disable the check that the supplied region contains 1 or more loops. Default is False (i.e. the check is enabled). - :param int options["async_queue"]: use the specified async stream. + + "async_queue": the async stream to be used. :raises NotImplementedError: if the supplied Nodes belong to a GOInvokeSchedule. diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index 231e5ef7a7..e3288cd1f2 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -33,8 +33,10 @@ # ----------------------------------------------------------------------------- # Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab # Modified I. Kavcic, Met Office -# Modified A. B. G. Chalk, STFC Daresbury Lab -# Modified J. G. Wallwork, Met Office / University of Cambridge +# A. B. G. Chalk, STFC Daresbury Lab +# J. G. Wallwork, Met Office / University of Cambridge +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann # ----------------------------------------------------------------------------- ''' Performs py.test tests on the OpenACC PSyIR Directive nodes. ''' @@ -545,14 +547,15 @@ def test_acc_routine_parallelism(): assert target.parallelism == "seq" target.parallelism = "vector" assert target.parallelism == "vector" - with pytest.raises(TypeError) as err: + with pytest.raises(TypeError) as einfo: target.parallelism = 1 assert ("Expected a str to specify the level of parallelism but got 'int'" - in str(err.value)) - with pytest.raises(ValueError) as err: + in str(einfo.value)) + with pytest.raises(ValueError) as einfo: target.parallelism = "sequential" - assert ("Expected one of ['seq', 'vector', 'worker', 'gang'] for the level" - " of parallelism but got 'sequential'" in str(err.value)) + print(str(einfo.value)) + assert ("Expected one of ['gang', 'seq', 'vector', 'worker'] for the level" + " of parallelism but got 'sequential'" in str(einfo.value)) # Class ACCUpdateDirective @@ -588,7 +591,7 @@ def test_accupdatedirective_init(): directive = ACCUpdateDirective(sig, "host", if_present=False) assert directive.if_present is False - assert directive.async_queue is False + assert directive.async_queue is None directive = ACCUpdateDirective(sig, "host", async_queue=True) assert directive.async_queue is True diff --git a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py index f2ec5972b8..2d924f2007 100644 --- a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py @@ -32,6 +32,8 @@ # POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- # Authors: R. W. Ford, A. R. Porter and S. Siso, STFC Daresbury Lab +# Modified S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann '''Module containing py.test tests for the transformation of the PSyIR of generic code using the OpenACC 'kernels' directive. diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index 0442da529c..7ff28b6478 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -35,6 +35,8 @@ # A. B. G. Chalk STFC Daresbury Lab # J. Henrichs, Bureau of Meteorology # Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann ''' This module provides the various transformations that can be applied to PSyIR nodes. There are both general and API-specific transformation From aefa05d578e6239e093e73d5789321e637457b1c Mon Sep 17 00:00:00 2001 From: SCHREIBER Martin Date: Tue, 26 Nov 2024 10:41:45 +0100 Subject: [PATCH 12/18] removed obsolete line => 100% coverage --- src/psyclone/transformations.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index 7ff28b6478..bdd4eb214c 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -2592,9 +2592,6 @@ def validate(self, sched, options={}): raise TransformationError("Schedule already has an OpenACC data " "region - cannot add an enter data.") - # handle async option - if options is None: - options = {} async_queue = options.get('async_queue', None) # check consistency with childs about async_queue From c8a2629fe948bbdd38e051a5214fc4494331347c Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 19 Dec 2024 15:15:45 +0000 Subject: [PATCH 13/18] #1664 begin adding new clause [skip ci] --- src/psyclone/psyir/nodes/acc_clauses.py | 29 +++++++++++++++++++++- src/psyclone/psyir/nodes/acc_directives.py | 18 +++++++++----- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/psyclone/psyir/nodes/acc_clauses.py b/src/psyclone/psyir/nodes/acc_clauses.py index c9186e4553..3cafae99bb 100644 --- a/src/psyclone/psyir/nodes/acc_clauses.py +++ b/src/psyclone/psyir/nodes/acc_clauses.py @@ -38,9 +38,35 @@ Clause nodes.''' from psyclone.psyir.nodes.clause import Clause +from psyclone.psyir.nodes.datanode import DataNode +from psyclone.psyir.nodes.node import Node from psyclone.psyir.nodes.reference import Reference +class ACCAsyncQueueClause(Clause): + ''' + OpenACC async clause. Specifies which queue, if any, this node is + associated with. + + ''' + _children_valid_format = "DataNode" + _clause_string = "async" + + @staticmethod + def _validate_child(position: int, child: Node) -> bool: + ''' + Decides whether a given child and position are valid for this node. + Only zero or one child of type DataNode is permitted. + + :param position: the position to be validated. + :param child: a child to be validated. + + ''' + if position != 0: + return False + return isinstance(child, DataNode) + + class ACCCopyClause(Clause): ''' OpenACC copy clause. Specifies a list of variables that are to be copied @@ -120,4 +146,5 @@ def _validate_child(position, child): return isinstance(child, Reference) -__all__ = ["ACCCopyClause", "ACCCopyInClause", "ACCCopyOutClause"] +__all__ = ["ACCAsyncQueueClause", "ACCCopyClause", + "ACCCopyInClause", "ACCCopyOutClause"] diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index 16efc30584..07e6f97cb8 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -49,8 +49,10 @@ from psyclone.core import Signature from psyclone.f2pygen import DirectiveGen, CommentGen from psyclone.errors import GenerationError, InternalError -from psyclone.psyir.nodes.acc_clauses import (ACCCopyClause, ACCCopyInClause, - ACCCopyOutClause) +from psyclone.psyir.nodes.acc_clauses import ( + ACCAsyncQueueClause, ACCCopyClause, ACCCopyInClause, + ACCCopyOutClause) +from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes.assignment import Assignment from psyclone.psyir.nodes.node import Node from psyclone.psyir.nodes.codeblock import CodeBlock @@ -60,7 +62,6 @@ from psyclone.psyir.nodes.psy_data_node import PSyDataNode from psyclone.psyir.nodes.routine import Routine from psyclone.psyir.nodes.reference import Reference -from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes.schedule import Schedule from psyclone.psyir.nodes.operation import BinaryOperation from psyclone.psyir.symbols import ScalarType @@ -237,9 +238,8 @@ class ACCEnterDataDirective(ACCStandaloneDirective, ACCAsyncMixin): directive as a child. :param async_queue: Enable async support and attach it to the given queue. Can use False to disable, True to enable on default - stream. Int to attach to the given stream ID or use a - variable Signature to say at runtime what stream to be - used. + stream. Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. ''' def __init__( @@ -249,6 +249,12 @@ def __init__( async_queue: Union[bool, int, Reference, None] = None ): super().__init__(children=children, parent=parent) + if async_queue is not None: + # TODO - convert async_queue value to PSyIR if necessary and + # add as child of clause. + clause = ACCAsyncQueueClause() + self.addchild(clause) + ACCAsyncMixin.__init__(self, async_queue) self._acc_dirs = None # List of parallel directives From 50ddb926e78f92cdf2cef00f92fc24169ba3f4aa Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 9 Jan 2025 12:57:58 +0000 Subject: [PATCH 14/18] #1664 more reworking to use Clause [skip ci] --- src/psyclone/psyir/backend/fortran.py | 11 +-- src/psyclone/psyir/nodes/acc_clauses.py | 13 ++- src/psyclone/psyir/nodes/acc_directives.py | 81 ++++++++++++---- src/psyclone/psyir/nodes/acc_mixins.py | 97 +++++++++++++------ src/psyclone/psyir/nodes/directive.py | 20 ++-- .../transformations/acc_kernels_trans.py | 2 +- .../tests/psyir/nodes/acc_directives_test.py | 37 +++---- src/psyclone/transformations.py | 24 ++--- 8 files changed, 185 insertions(+), 100 deletions(-) diff --git a/src/psyclone/psyir/backend/fortran.py b/src/psyclone/psyir/backend/fortran.py index e2bd7008cb..599fa93fdd 100644 --- a/src/psyclone/psyir/backend/fortran.py +++ b/src/psyclone/psyir/backend/fortran.py @@ -1674,14 +1674,11 @@ def standalonedirective_node(self, node): result = f"{self._nindent}!${node.begin_string()}" clause_list = [] - # Currently no standalone directives have clauses associated - # so this code is left commented out. If a standalone directive - # is added with clauses, this should be added in. - # for clause in node.clauses: - # clause_list.append(self._visit(clause)) + for clause in node.clauses: + clause_list.append(self._visit(clause)) # Add a space only if there are clauses - # if len(clause_list) > 0: - # result = result + " " + if len(clause_list) > 0: + result = result + " " result = result + ", ".join(clause_list) result = result + "\n" diff --git a/src/psyclone/psyir/nodes/acc_clauses.py b/src/psyclone/psyir/nodes/acc_clauses.py index 3cafae99bb..ca65c18396 100644 --- a/src/psyclone/psyir/nodes/acc_clauses.py +++ b/src/psyclone/psyir/nodes/acc_clauses.py @@ -45,8 +45,8 @@ class ACCAsyncQueueClause(Clause): ''' - OpenACC async clause. Specifies which queue, if any, this node is - associated with. + OpenACC async clause. Has one child which specifies which queue, if any, + this node is associated with. ''' _children_valid_format = "DataNode" @@ -66,6 +66,15 @@ def _validate_child(position: int, child: Node) -> bool: return False return isinstance(child, DataNode) + @property + def queue(self) -> Union[DataNode, None]: + ''' + :returns: the queue specified by this clause (if any) + ''' + if self.children: + return self.children[0] + return None + class ACCCopyClause(Clause): ''' diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index 07e6f97cb8..4ae1a7e94c 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -54,17 +54,19 @@ ACCCopyOutClause) from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes.assignment import Assignment -from psyclone.psyir.nodes.node import Node +from psyclone.psyir.nodes.clause import Clause from psyclone.psyir.nodes.codeblock import CodeBlock from psyclone.psyir.nodes.directive import (StandaloneDirective, RegionDirective) from psyclone.psyir.nodes.intrinsic_call import IntrinsicCall +from psyclone.psyir.nodes.literal import Literal +from psyclone.psyir.nodes.node import Node from psyclone.psyir.nodes.psy_data_node import PSyDataNode from psyclone.psyir.nodes.routine import Routine from psyclone.psyir.nodes.reference import Reference from psyclone.psyir.nodes.schedule import Schedule from psyclone.psyir.nodes.operation import BinaryOperation -from psyclone.psyir.symbols import ScalarType +from psyclone.psyir.symbols import INTEGER_TYPE, ScalarType from psyclone.f2pygen import BaseGen @@ -92,13 +94,16 @@ class ACCRegionDirective(ACCDirective, RegionDirective, metaclass=abc.ABCMeta): ''' Base class for all OpenACC region directive statements. ''' + # Textual description of the node. + _children_valid_format = "Schedule, Clause*" + def validate_global_constraints(self): ''' Perform validation checks for any global constraints. This can only be done at code-generation time. - :raises GenerationError: if this ACCRegionDirective encloses any form \ - of PSyData node since calls to PSyData routines within OpenACC \ + :raises GenerationError: if this ACCRegionDirective encloses any form + of PSyData node since calls to PSyData routines within OpenACC regions are not supported. ''' @@ -114,6 +119,21 @@ def validate_global_constraints(self): f"{[type(node).__name__ for node in data_nodes]} within a " f"region enclosed by an '{type(self).__name__}'") + @staticmethod + def _validate_child(position, child): + ''' + :param int position: the position to be validated. + :param child: a child to be validated. + :type child: :py:class:`psyclone.psyir.nodes.Node` + + :return: whether the given child and position are valid for this node. + :rtype: bool + + ''' + if position == 0: + return isinstance(child, Schedule) + return isinstance(child, Clause) + @property def signatures(self) -> Union[ Tuple[Set[Signature]], @@ -155,6 +175,19 @@ class ACCStandaloneDirective(ACCDirective, StandaloneDirective, metaclass=abc.ABCMeta): ''' Base class for all standalone OpenACC directive statements. ''' + @staticmethod + def _validate_child(position, child): + ''' + :param int position: the position to be validated. + :param child: a child to be validated. + :type child: :py:class:`psyclone.psyir.nodes.Node` + + :return: whether the given child and position are valid for this node. + :rtype: bool + + ''' + return StandaloneDirective._validate_child(position, child) + class ACCRoutineDirective(ACCStandaloneDirective): ''' @@ -241,19 +274,16 @@ class ACCEnterDataDirective(ACCStandaloneDirective, ACCAsyncMixin): stream. Int to attach to the given stream ID or use a PSyIR expression to say at runtime what stream to be used. + :raises TypeError: if async_queue is of the wrong type. + ''' def __init__( self, children: List[Node] = None, parent: Node = None, - async_queue: Union[bool, int, Reference, None] = None + async_queue: Union[bool, int, Reference] = False ): super().__init__(children=children, parent=parent) - if async_queue is not None: - # TODO - convert async_queue value to PSyIR if necessary and - # add as child of clause. - clause = ACCAsyncQueueClause() - self.addchild(clause) ACCAsyncMixin.__init__(self, async_queue) self._acc_dirs = None # List of parallel directives @@ -270,6 +300,7 @@ def gen_code(self, parent: BaseGen): ''' self.validate_global_constraints() + self.lower_to_language_level() # Leverage begin_string() to raise an exception if there are no # variables to copyin but discard the generated string since it is @@ -372,7 +403,7 @@ class ACCParallelDirective(ACCRegionDirective, ACCAsyncMixin): ''' def __init__( self, - async_queue: Union[bool, int, Reference, None] = None, + async_queue: Union[bool, int, Reference] = False, default_present: bool = True, **kwargs ): @@ -734,20 +765,20 @@ class ACCKernelsDirective(ACCRegionDirective, ACCAsyncMixin): and which are therefore children of this node. :param parent: the parent of this node in the PSyIR. :param bool default_present: whether or not to add the - "default(present)" clause to the kernels - directive. + "default(present)" clause to the kernels directive. :param async_queue: Make the directive asynchronous and attached to the given stream identified by an ID or by a variable name pointing to an integer. ''' + _children_valid_format = "Schedule, Clause*" def __init__( self, children: List[Node] = None, parent: Node = None, default_present: bool = True, - async_queue: Union[bool, int, Reference, None] = None + async_queue: Union[bool, int, Reference] = False ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) @@ -770,6 +801,21 @@ def __eq__(self, other): return is_eq + @staticmethod + def _validate_child(position: int, child: Node) -> bool: + ''' + :param int position: the position to be validated. + :param child: a child to be validated. + :type child: :py:class:`psyclone.psyir.nodes.Node` + + :return: whether the given child and position are valid for this node. + :rtype: bool + + ''' + if position == 0: + return isinstance(child, Schedule) + return isinstance(child, ACCAsyncQueueClause) + @property def default_present(self) -> bool: ''' @@ -795,7 +841,8 @@ def gen_code(self, parent: BaseGen): parent.add(DirectiveGen(parent, "acc", "begin", *self.begin_string().split()[1:])) for child in self.children: - child.gen_code(parent) + if not isinstance(child, Clause): + child.gen_code(parent) parent.add(DirectiveGen(parent, *self.end_string().split())) @@ -849,7 +896,7 @@ def gen_code(self, _): "ACCDataDirective.gen_code should not have been called.") @staticmethod - def _validate_child(position: int, child: Node) -> str: + def _validate_child(position: int, child: Node) -> bool: ''' Check that the supplied node is a valid child of this node at the specified position. @@ -947,7 +994,7 @@ def __init__( children: List[Node] = None, parent: Node = None, if_present: Optional[bool] = True, - async_queue: Union[bool, int, Reference, None] = None + async_queue: Union[bool, int, Reference] = False ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) diff --git a/src/psyclone/psyir/nodes/acc_mixins.py b/src/psyclone/psyir/nodes/acc_mixins.py index 2ad794ed82..71354d2768 100644 --- a/src/psyclone/psyir/nodes/acc_mixins.py +++ b/src/psyclone/psyir/nodes/acc_mixins.py @@ -39,10 +39,12 @@ classes.''' import abc +from typing import Union +from psyclone.psyir.nodes.acc_clauses import ACCAsyncQueueClause +from psyclone.psyir.nodes.literal import Literal from psyclone.psyir.nodes.reference import Reference - -from typing import Union +from psyclone.psyir.symbols import INTEGER_TYPE class ACCAsyncMixin(metaclass=abc.ABCMeta): @@ -58,12 +60,52 @@ class ACCAsyncMixin(metaclass=abc.ABCMeta): ''' def __init__( self, - async_queue: Union[bool, int, Reference, None] = None + async_queue: Union[bool, int, Reference] = False ): - self.async_queue = async_queue + clause = self._create_clause(async_queue) + if clause: + self.addchild(clause) + + @staticmethod + def convert_queue(async_queue): + ''' + ''' + if isinstance(async_queue, bool): + qarg = async_queue + elif isinstance(async_queue, int): + qarg = Literal(f"{async_queue}", INTEGER_TYPE) + elif isinstance(async_queue, Reference): + qarg = async_queue + else: + raise TypeError(f"Invalid async_queue value, expect Reference or " + f"integer or None or bool, got : {async_queue}") + return qarg + + def _create_clause(self, async_queue): + ''' + :raises TypeError: if `async_queue` is of the wrong type + ''' + if async_queue is False: + return None + # Convert async_queue value to PSyIR if necessary and + # add as child of clause. + qarg = self.convert_queue(async_queue) + clause = ACCAsyncQueueClause() + if qarg and not qarg is True: + clause.addchild(qarg) + return clause + + @property + def async_clause(self) -> Union[ACCAsyncQueueClause, None]: + ''' + ''' + for child in self.clauses: + if isinstance(child, ACCAsyncQueueClause): + return child + return None @property - def async_queue(self) -> Union[bool, int, Reference, None]: + def async_queue(self) -> Union[bool, int, Reference]: ''' :returns: whether or not async is enabled and if so, which queue this node is associated with. (True indicates the default stream.) @@ -71,7 +113,12 @@ def async_queue(self) -> Union[bool, int, Reference, None]: Int to attach to the given stream ID or use a variable Reference to say at runtime what stream to be used. ''' - return self._async_queue + clause = self.async_clause + if clause: + if clause.queue is None: + return True + return clause.queue + return False @async_queue.setter def async_queue(self, async_queue: Union[bool, int, Reference, None]): @@ -82,21 +129,22 @@ def async_queue(self, async_queue: Union[bool, int, Reference, None]): ID or use a variable Reference to say at runtime what stream to be used. - :raises TypeError: if `wait_queue` is of the wrong type ''' - # check - if (async_queue is not None and - not isinstance(async_queue, (bool, Reference, int))): - raise TypeError(f"Invalid async_queue value, expect Reference or " - f"integer or None or bool, got : {async_queue}") - - # assign - self._async_queue = async_queue + clause = self._create_clause(async_queue) + existing = self.async_clause + if existing: + if clause: + existing.replace_with(clause) + else: + existing.detach() + else: + if clause: + self.addchild(clause) def _build_async_string(self) -> str: ''' Build the async arg to concat to the acc directive when generating the - code. + code in the old, 'gen_code' path. :returns: The `async()` option to add to the directive. ''' @@ -104,18 +152,11 @@ def _build_async_string(self) -> str: result = "" # async - if self.async_queue is not None: - if isinstance(self.async_queue, bool): - if self.async_queue: - result = " async()" - elif isinstance(self.async_queue, int): - result = f" async({self.async_queue})" - elif isinstance(self.async_queue, Reference): - from psyclone.psyir.backend.fortran import FortranWriter - text = FortranWriter()(self.async_queue) - result = f" async({text})" - - # ok + clause = self.async_clause + if clause: + from psyclone.psyir.backend.fortran import FortranWriter + result = f" {FortranWriter()(clause)}" + return result def __eq__(self, other) -> bool: diff --git a/src/psyclone/psyir/nodes/directive.py b/src/psyclone/psyir/nodes/directive.py index e9bb6c5f41..d6339dc841 100644 --- a/src/psyclone/psyir/nodes/directive.py +++ b/src/psyclone/psyir/nodes/directive.py @@ -43,6 +43,7 @@ import abc from collections import OrderedDict +from typing import List from psyclone.configuration import Config from psyclone.core import Signature, VariablesAccessInfo @@ -50,6 +51,7 @@ from psyclone.f2pygen import CommentGen from psyclone.psyir.nodes.array_of_structures_reference import ( ArrayOfStructuresReference) +from psyclone.psyir.nodes.clause import Clause from psyclone.psyir.nodes.loop import Loop from psyclone.psyir.nodes.reference import Reference from psyclone.psyir.nodes.schedule import Schedule @@ -290,8 +292,9 @@ class StandaloneDirective(Directive): (e.g. OpenMP, OpenACC, compiler-specific) inherit from this class. ''' - # Textual description of the node. - _children_valid_format = None + # Textual description of the node. A standalone directive may only have + # Clauses as children. + _children_valid_format = "Clause*" @staticmethod def _validate_child(position, child): @@ -304,20 +307,15 @@ def _validate_child(position, child): :rtype: bool ''' - # Children are not allowed for StandaloneDirective - return False + # Only clauses are permitted. + return isinstance(child, Clause) @property - def clauses(self): + def clauses(self) -> List[Clause]: ''' :returns: the Clauses associated with this directive. - :rtype: List of :py:class:`psyclone.psyir.nodes.Clause` ''' - # This should be uncommented once a standalone directive with - # clauses exists - # if len(self.children) > 0: - # return self.children - return [] + return self.children # For automatic API documentation generation diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index 46fc19f326..43ab85b685 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -107,7 +107,7 @@ def apply( start_index = node_list[0].position default_present = options.get("default_present", False) - async_queue = options.get("async_queue", None) + async_queue = options.get("async_queue", False) # check self.check_async_queue(node_list, async_queue) diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index e3288cd1f2..1fd0f1ba4e 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -485,7 +485,7 @@ def test_acckernelsdirective_gencode_async_queue(async_queue): if async_queue is None: string = "" elif isinstance(async_queue, bool) and async_queue is True: - string = " async()" + string = " async" elif isinstance(async_queue, bool) and async_queue is False: string = "" elif isinstance(async_queue, int): @@ -553,7 +553,6 @@ def test_acc_routine_parallelism(): in str(einfo.value)) with pytest.raises(ValueError) as einfo: target.parallelism = "sequential" - print(str(einfo.value)) assert ("Expected one of ['gang', 'seq', 'vector', 'worker'] for the level" " of parallelism but got 'sequential'" in str(einfo.value)) @@ -591,13 +590,13 @@ def test_accupdatedirective_init(): directive = ACCUpdateDirective(sig, "host", if_present=False) assert directive.if_present is False - assert directive.async_queue is None + assert directive.async_queue is False directive = ACCUpdateDirective(sig, "host", async_queue=True) assert directive.async_queue is True directive = ACCUpdateDirective(sig, "host", async_queue=1) - assert directive.async_queue == 1 + assert directive.async_queue.value == "1" directive = ACCUpdateDirective(sig, "host", async_queue=Reference(Symbol("var"))) @@ -615,18 +614,17 @@ def test_accupdatedirective_begin_string(): async_queue=True) directive_async_queue_int = ACCUpdateDirective(sig, "device", async_queue=1) - directive_async_queue_str = \ - ACCUpdateDirective(sig, "device", async_queue=Reference(Symbol("var"))) + directive_async_queue_str = ACCUpdateDirective( + sig, "device", async_queue=Reference(Symbol("var"))) assert directive_host.begin_string() == "acc update host(x)" - assert directive_device.begin_string() \ - == "acc update if_present device(x)" - assert directive_async_default.begin_string() \ - == "acc update if_present device(x) async()" - assert directive_async_queue_int.begin_string() \ - == "acc update if_present device(x) async(1)" - assert directive_async_queue_str.begin_string() \ - == "acc update if_present device(x) async(var)" + assert directive_device.begin_string() == "acc update if_present device(x)" + assert (directive_async_default.begin_string() == + "acc update if_present device(x) async") + assert (directive_async_queue_int.begin_string() == + "acc update if_present device(x) async(1)") + assert (directive_async_queue_str.begin_string() == + "acc update if_present device(x) async(var)") with pytest.raises(GenerationError) as err: directive_empty.begin_string() @@ -734,23 +732,18 @@ def test_directives_async_queue(directive_type): directive._sig_set.add(Signature("x")) # check initial status - assert directive.async_queue == 1 + assert directive.async_queue.value == "1" assert 'async(1)' in directive.begin_string() # change value to true directive.async_queue = True assert directive.async_queue is True - assert 'async()' in directive.begin_string() + assert 'async' in directive.begin_string() # change value to False directive.async_queue = False assert directive.async_queue is False - assert 'async()' not in directive.begin_string() - - # change value to None - directive.async_queue = None - assert directive.async_queue is None - assert 'async()' not in directive.begin_string() + assert 'async' not in directive.begin_string() # change value afterward directive.async_queue = Reference(Symbol("stream")) diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index bdd4eb214c..ffd7bba7c0 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -65,6 +65,7 @@ OMPParallelDirective, OMPParallelDoDirective, OMPSerialDirective, OMPSingleDirective, OMPTaskloopDirective, PSyDataNode, Reference, Return, Routine, Schedule) +from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes.array_mixin import ArrayMixin from psyclone.psyir.nodes.structure_member import StructureMember from psyclone.psyir.nodes.structure_reference import StructureReference @@ -2501,7 +2502,7 @@ def apply(self, sched, options={}): The available options are : - async_queue : Permit to force using the given async stream if - not None. + not False. :type options: Optional[Dict[str, Any]] @@ -2531,8 +2532,8 @@ def apply(self, sched, options={}): current = current.parent posn = sched.children.index(current) - # extract async. Default to None - async_queue = options.get('async_queue', None) + # extract async. Default to False. + async_queue = options.get('async_queue', False) # check self.check_child_async(sched, async_queue) @@ -2553,16 +2554,15 @@ def check_child_async(self, sched, async_queue): :param async_queue: The async queue to expect in childs. :type async_queue: \ - Optional[bool,int,:py:class: psyclone.core.Reference] + Optional[bool,int,:py:class:`psyclone.core.Reference`] ''' - + qval = ACCAsyncMixin.convert_queue(async_queue) directive_cls = (ACCParallelDirective, ACCKernelsDirective) - for dir in sched.walk(directive_cls): - if async_queue is not None: - if async_queue != dir.async_queue: - raise TransformationError( - 'Try to make an ACCEnterDataTrans with async_queue ' - 'different than the one in child kernels !') + for dirv in sched.walk(directive_cls): + if qval != dirv.async_queue: + raise TransformationError( + 'Try to make an ACCEnterDataTrans with async_queue ' + 'different than the one in child kernels !') def validate(self, sched, options={}): # pylint: disable=arguments-differ, arguments-renamed @@ -2592,7 +2592,7 @@ def validate(self, sched, options={}): raise TransformationError("Schedule already has an OpenACC data " "region - cannot add an enter data.") - async_queue = options.get('async_queue', None) + async_queue = options.get('async_queue', False) # check consistency with childs about async_queue self.check_child_async(sched, async_queue) From 2c677a26f540fed6f335dece33c643063853c12c Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Thu, 9 Jan 2025 13:01:24 +0000 Subject: [PATCH 15/18] #1664 fix linting [skip ci] --- src/psyclone/psyir/nodes/acc_clauses.py | 2 ++ src/psyclone/psyir/nodes/acc_directives.py | 3 +-- src/psyclone/psyir/nodes/acc_mixins.py | 5 ++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/psyclone/psyir/nodes/acc_clauses.py b/src/psyclone/psyir/nodes/acc_clauses.py index ca65c18396..d034812e80 100644 --- a/src/psyclone/psyir/nodes/acc_clauses.py +++ b/src/psyclone/psyir/nodes/acc_clauses.py @@ -37,6 +37,8 @@ ''' This module contains the implementations of the various OpenACC Directive Clause nodes.''' +from typing import Union + from psyclone.psyir.nodes.clause import Clause from psyclone.psyir.nodes.datanode import DataNode from psyclone.psyir.nodes.node import Node diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index 4ae1a7e94c..478a79b1be 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -59,14 +59,13 @@ from psyclone.psyir.nodes.directive import (StandaloneDirective, RegionDirective) from psyclone.psyir.nodes.intrinsic_call import IntrinsicCall -from psyclone.psyir.nodes.literal import Literal from psyclone.psyir.nodes.node import Node from psyclone.psyir.nodes.psy_data_node import PSyDataNode from psyclone.psyir.nodes.routine import Routine from psyclone.psyir.nodes.reference import Reference from psyclone.psyir.nodes.schedule import Schedule from psyclone.psyir.nodes.operation import BinaryOperation -from psyclone.psyir.symbols import INTEGER_TYPE, ScalarType +from psyclone.psyir.symbols import ScalarType from psyclone.f2pygen import BaseGen diff --git a/src/psyclone/psyir/nodes/acc_mixins.py b/src/psyclone/psyir/nodes/acc_mixins.py index 71354d2768..a33bdfe1ca 100644 --- a/src/psyclone/psyir/nodes/acc_mixins.py +++ b/src/psyclone/psyir/nodes/acc_mixins.py @@ -86,13 +86,16 @@ def _create_clause(self, async_queue): :raises TypeError: if `async_queue` is of the wrong type ''' if async_queue is False: + # There's no async clause. return None # Convert async_queue value to PSyIR if necessary and # add as child of clause. qarg = self.convert_queue(async_queue) clause = ACCAsyncQueueClause() - if qarg and not qarg is True: + if qarg and qarg is not True: + # A specific queue is supplied. clause.addchild(qarg) + # No queue is specified return clause @property From c099396976f030950ff4723d3637baec4e5243d9 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Fri, 10 Jan 2025 11:41:53 +0000 Subject: [PATCH 16/18] #1664 fix tests --- .../transformations/acc_kernels_trans.py | 50 ++++++++++++------- .../tests/psyir/nodes/directive_test.py | 2 +- .../transformations/acc_kernels_trans_test.py | 2 +- 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index 43ab85b685..9722ea29b3 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -41,20 +41,19 @@ ''' This module provides the ACCKernelsTrans transformation. ''' import re +from typing import Any, Dict, List, Union from psyclone import psyGen from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes import ( ACCEnterDataDirective, ACCKernelsDirective, Assignment, - Call, CodeBlock, Loop, Node, + Call, CodeBlock, Literal, Loop, Node, PSyDataNode, Reference, Return, Routine, Statement, WhileLoop) -from psyclone.psyir.symbols import UnsupportedFortranType +from psyclone.psyir.symbols import INTEGER_TYPE, UnsupportedFortranType from psyclone.psyir.transformations.region_trans import RegionTrans from psyclone.psyir.transformations.transformation_error import ( TransformationError) -from typing import Any, Dict, List, Union - class ACCKernelsTrans(RegionTrans): ''' @@ -122,44 +121,61 @@ def apply( @staticmethod def check_async_queue( nodes: List[Node], - async_queue: Union[bool, int, Reference, None] + async_queue: Union[bool, int, Reference] ): ''' Common function to check that all parent data directives have the same async queue. :param node: the nodes in the PSyIR to enclose. - :param async_queue: The async queue to expect in parents. + :param async_queue: The async queue to expect in ancestors. + :raises TypeError: if the supplied queue is of the wrong type. + :raises TransformationError: if the supplied queue does not match + that specified by any ancestor nodes. ''' - if async_queue is None: + def _to_str(val): + return (f"'{val.debug_string()}'" if isinstance(val, Node) + else "None") + + if async_queue is False: + # The kernels directive will not have the async clause. return - # check type - if not isinstance(async_queue, (int, Reference)): + # check type (a bool is an instance of int) and ensure the supplied + # value is in a form suitable for comparison with values already + # stored in the PSyIR. + if isinstance(async_queue, bool): + # A value of True means that async is specified with no queue. + checkval = None + elif isinstance(async_queue, int): + checkval = Literal(f"{async_queue}", INTEGER_TYPE) + elif isinstance(async_queue, Reference): + checkval = async_queue + else: raise TypeError(f"Invalid async_queue value, expect Reference or " - f"integer or None or False, got : {async_queue}") + f"integer or None or bool, got : {async_queue}") # Perform an additional check whether a queue has been used before. # Note this to work only for the current routine. parent = nodes[0].ancestor(ACCAsyncMixin) if parent is not None: - if async_queue != parent.async_queue: + if checkval != parent.async_queue: raise TransformationError( f"Cannot apply ACCKernelsTrans with asynchronous " - f"queue '{async_queue}' because a parent " - f"directive specifies queue '{parent.async_queue}'") + f"queue {_to_str(checkval)} because a parent directive " + f"specifies queue {_to_str(parent.async_queue)}") parent = nodes[0].ancestor(Routine) if parent: edata = parent.walk(ACCEnterDataDirective) if edata: - if async_queue != edata[0].async_queue: + if checkval != edata[0].async_queue: raise TransformationError( f"Cannot apply ACCKernelsTrans with asynchronous queue" - f" '{async_queue}' because the containing routine " + f" {_to_str(checkval)} because the containing routine " f"has an ENTER DATA directive specifying queue " - f"'{edata[0].async_queue}'") + f"{_to_str(edata[0].async_queue)}") def validate( self, @@ -243,7 +259,7 @@ def validate( f"OpenACC region because it is not available on GPU.") # extract async option and check validity - async_queue = options.get('async_queue', None) + async_queue = options.get('async_queue', False) self.check_async_queue(node_list, async_queue) # Check that we have at least one loop or array range within diff --git a/src/psyclone/tests/psyir/nodes/directive_test.py b/src/psyclone/tests/psyir/nodes/directive_test.py index bdb54f17d7..fcff1648a2 100644 --- a/src/psyclone/tests/psyir/nodes/directive_test.py +++ b/src/psyclone/tests/psyir/nodes/directive_test.py @@ -225,4 +225,4 @@ def test_standalonedirective_children_validation(): with pytest.raises(GenerationError) as excinfo: cdir.addchild(schedule) assert ("Item 'Schedule' can't be child 0 of 'StandaloneDirective'. The " - "valid format is: 'None'." in str(excinfo.value)) + "valid format is: 'Clause*'." in str(excinfo.value)) diff --git a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py index 2d924f2007..c9b2e90180 100644 --- a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py @@ -499,7 +499,7 @@ def test_check_async_queue_with_enter_data(fortran_reader): with pytest.raises(TypeError) as err: acc_trans.check_async_queue(None, 3.5) assert ("Invalid async_queue value, expect Reference or integer or None " - "or False, got : 3.5" in str(err.value)) + "or bool, got : 3.5" in str(err.value)) psyir = fortran_reader.psyir_from_source( "program two_loops\n" " integer :: ji\n" From 1fb7c85147e1e1faf04a90ea3c56a5830fd206ff Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Fri, 10 Jan 2025 17:18:14 +0000 Subject: [PATCH 17/18] #1664 support DataNode for async queue and tidy docstrings --- src/psyclone/psyir/nodes/acc_directives.py | 56 +++++++------ src/psyclone/psyir/nodes/acc_mixins.py | 78 +++++++++++++------ .../transformations/acc_kernels_trans.py | 28 +++++-- .../tests/psyir/nodes/acc_directives_test.py | 11 ++- src/psyclone/transformations.py | 6 +- 5 files changed, 114 insertions(+), 65 deletions(-) diff --git a/src/psyclone/psyir/nodes/acc_directives.py b/src/psyclone/psyir/nodes/acc_directives.py index 9f98cc3899..d9e4e959e0 100644 --- a/src/psyclone/psyir/nodes/acc_directives.py +++ b/src/psyclone/psyir/nodes/acc_directives.py @@ -45,9 +45,10 @@ nodes.''' import abc +from typing import Dict, List, Optional, Set, Tuple, Union from psyclone.core import Signature -from psyclone.f2pygen import DirectiveGen, CommentGen +from psyclone.f2pygen import BaseGen, CommentGen, DirectiveGen from psyclone.errors import GenerationError, InternalError from psyclone.psyir.nodes.acc_clauses import ( ACCAsyncQueueClause, ACCCopyClause, ACCCopyInClause, @@ -56,6 +57,7 @@ from psyclone.psyir.nodes.assignment import Assignment from psyclone.psyir.nodes.clause import Clause from psyclone.psyir.nodes.codeblock import CodeBlock +from psyclone.psyir.nodes.datanode import DataNode from psyclone.psyir.nodes.directive import (StandaloneDirective, RegionDirective) from psyclone.psyir.nodes.intrinsic_call import IntrinsicCall @@ -67,10 +69,6 @@ from psyclone.psyir.nodes.operation import BinaryOperation from psyclone.psyir.symbols import ScalarType -from psyclone.f2pygen import BaseGen - -from typing import Dict, List, Optional, Set, Tuple, Union - class ACCDirective(metaclass=abc.ABCMeta): # pylint: disable=too-few-public-methods @@ -136,9 +134,7 @@ def _validate_child(position, child): @property def signatures(self) -> Union[ Tuple[Set[Signature]], - Tuple[ - Set[Signature], Set[Signature] - ] + Tuple[Set[Signature], Set[Signature]] ]: ''' Returns a 1-tuple or a 2-tuple of sets depending on the working API. @@ -150,8 +146,8 @@ def signatures(self) -> Union[ device (probably a GPU) before the parallel region can be begun. :returns: 1-tuple or 2-tuple of input and output sets of variable names - ''' + ''' # pylint: disable=import-outside-toplevel from psyclone.domain.lfric import LFRicInvokeSchedule from psyclone.gocean1p0 import GOInvokeSchedule @@ -175,16 +171,15 @@ class ACCStandaloneDirective(ACCDirective, StandaloneDirective, ''' Base class for all standalone OpenACC directive statements. ''' @staticmethod - def _validate_child(position, child): + def _validate_child(position: int, child: Node) -> bool: ''' - :param int position: the position to be validated. + :param position: the position to be validated. :param child: a child to be validated. - :type child: :py:class:`psyclone.psyir.nodes.Node` :return: whether the given child and position are valid for this node. - :rtype: bool ''' + # Ensure we call the _validate_child() in the correct parent class.. return StandaloneDirective._validate_child(position, child) @@ -200,8 +195,6 @@ class ACCRoutineDirective(ACCStandaloneDirective): def __init__(self, parallelism: str = "seq", **kwargs): self.parallelism = parallelism - assert self.parallelism in self.SUPPORTED_PARALLELISM - super().__init__(self, **kwargs) @property @@ -280,7 +273,7 @@ def __init__( self, children: List[Node] = None, parent: Node = None, - async_queue: Union[bool, int, Reference] = False + async_queue: Union[bool, int, DataNode] = False ): super().__init__(children=children, parent=parent) @@ -363,6 +356,8 @@ def begin_string(self): "Perhaps there are no ACCParallel or ACCKernels directives " "within the region?") + # Variables need lexicographic sorting since sets guarantee no ordering + # and members of composite variables must appear later in deep copies. sym_list = _sig_set_to_string(self._sig_set) # options @@ -371,8 +366,6 @@ def begin_string(self): # async options += self._build_async_string() - # Variables need lexicographic sorting since sets guarantee no ordering - # and members of composite variables must appear later in deep copies. return f"acc enter data{options}" def data_on_device(self, parent: Node): @@ -395,14 +388,15 @@ class ACCParallelDirective(ACCRegionDirective, ACCAsyncMixin): :param default_present: whether this directive includes the `DEFAULT(PRESENT)` clause or not. - :param async_queue: If `True`, make the directive asynchronous. - If of type `int` or a `Reference`, also attach it to the given stream - identified by an ID or by a variable name pointing to an integer. + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. ''' def __init__( self, - async_queue: Union[bool, int, Reference] = False, + async_queue: Union[bool, int, DataNode] = False, default_present: bool = True, **kwargs ): @@ -765,9 +759,10 @@ class ACCKernelsDirective(ACCRegionDirective, ACCAsyncMixin): :param parent: the parent of this node in the PSyIR. :param bool default_present: whether or not to add the "default(present)" clause to the kernels directive. - :param async_queue: Make the directive asynchronous and attached to the - given stream identified by an ID or by a variable - name pointing to an integer. + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. ''' _children_valid_format = "Schedule, Clause*" @@ -777,7 +772,7 @@ def __init__( children: List[Node] = None, parent: Node = None, default_present: bool = True, - async_queue: Union[bool, int, Reference] = False + async_queue: Union[bool, int, DataNode] = False ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) @@ -978,9 +973,10 @@ class ACCUpdateDirective(ACCStandaloneDirective, ACCAsyncMixin): clause on the update directive (this instructs the directive to silently ignore any variables that are not on the device). - :param async_queue: Make the directive asynchronous and attached to the - given stream identified by an ID or by a variable name - pointing to an integer. + :param async_queue: Enable async support and attach it to the given queue. + Can use False to disable, True to enable on default + stream. Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. ''' @@ -993,7 +989,7 @@ def __init__( children: List[Node] = None, parent: Node = None, if_present: Optional[bool] = True, - async_queue: Union[bool, int, Reference] = False + async_queue: Union[bool, int, DataNode] = False ): super().__init__(children=children, parent=parent) ACCAsyncMixin.__init__(self, async_queue) diff --git a/src/psyclone/psyir/nodes/acc_mixins.py b/src/psyclone/psyir/nodes/acc_mixins.py index a33bdfe1ca..edda1a1853 100644 --- a/src/psyclone/psyir/nodes/acc_mixins.py +++ b/src/psyclone/psyir/nodes/acc_mixins.py @@ -1,7 +1,7 @@ # ----------------------------------------------------------------------------- # BSD 3-Clause License # -# Copyright (c) 2021-2023, Science and Technology Facilities Council. +# Copyright (c) 2021-2025, Science and Technology Facilities Council. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -31,8 +31,9 @@ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -# Authors S. Valat, Inria / Lab. Jean Kuntzmann -# Modified M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann +# Author: S. Valat, Inria / Lab. Jean Kuntzmann +# Modified: M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann +# A. R. Porter, STFC Daresbury Laboratory # ----------------------------------------------------------------------------- ''' This module contains the mixins to apply some ACC features on many @@ -42,8 +43,8 @@ from typing import Union from psyclone.psyir.nodes.acc_clauses import ACCAsyncQueueClause +from psyclone.psyir.nodes.datanode import DataNode from psyclone.psyir.nodes.literal import Literal -from psyclone.psyir.nodes.reference import Reference from psyclone.psyir.symbols import INTEGER_TYPE @@ -55,42 +56,65 @@ class ACCAsyncMixin(metaclass=abc.ABCMeta): :param async_queue: Enable async support and attach it to the given queue. Can use False to disable, True to enable on default stream. Use int to attach to the given stream ID or - use a variable Reference to say at runtime what stream + use a PSyIR expression to say at runtime what stream to be used. ''' def __init__( self, - async_queue: Union[bool, int, Reference] = False + async_queue: Union[bool, int, DataNode] = False ): clause = self._create_clause(async_queue) if clause: self.addchild(clause) @staticmethod - def convert_queue(async_queue): + def convert_queue( + async_queue: Union[bool, int, DataNode]) -> Union[bool, DataNode]: ''' + Utility to convert the provided queue value to PSyIR when + applicable. + + :param async_queue: the queue value to convert. + + :returns: PSyIR of queue value or bool specifying whether or not async + is enabled. + + :raises TypeError: if the supplied queue value is of unsupported type. + ''' if isinstance(async_queue, bool): qarg = async_queue elif isinstance(async_queue, int): qarg = Literal(f"{async_queue}", INTEGER_TYPE) - elif isinstance(async_queue, Reference): + elif isinstance(async_queue, DataNode): qarg = async_queue else: - raise TypeError(f"Invalid async_queue value, expect Reference or " - f"integer or None or bool, got : {async_queue}") + raise TypeError(f"Invalid async_queue value, expected DataNode, " + f"integer or bool, got : {async_queue}") return qarg - def _create_clause(self, async_queue): + @staticmethod + def _create_clause( + async_queue: Union[bool, int, DataNode] + ) -> Union[ACCAsyncQueueClause, None]: ''' + Utility to create a new ACCAsyncQueueClause for the specified queue. + + :param async_queue: the queue value to use for the async clause (or + True to enable on default queue or False to disable). + + :returns: a new ACCAsyncQueueClause if async is enabled and None + otherwise. + :raises TypeError: if `async_queue` is of the wrong type + ''' if async_queue is False: # There's no async clause. return None # Convert async_queue value to PSyIR if necessary and # add as child of clause. - qarg = self.convert_queue(async_queue) + qarg = ACCAsyncMixin.convert_queue(async_queue) clause = ACCAsyncQueueClause() if qarg and qarg is not True: # A specific queue is supplied. @@ -101,6 +125,7 @@ def _create_clause(self, async_queue): @property def async_clause(self) -> Union[ACCAsyncQueueClause, None]: ''' + :returns: the queue clause associated with this node or None. ''' for child in self.clauses: if isinstance(child, ACCAsyncQueueClause): @@ -108,40 +133,48 @@ def async_clause(self) -> Union[ACCAsyncQueueClause, None]: return None @property - def async_queue(self) -> Union[bool, int, Reference]: + def async_queue(self) -> Union[bool, int, DataNode]: ''' :returns: whether or not async is enabled and if so, which queue this node is associated with. (True indicates the default stream.) Can use False to disable, True to enable on default stream. - Int to attach to the given stream ID or use a variable - Reference to say at runtime what stream to be used. + Int to attach to the given stream ID or use a PSyIR + expression to say at runtime what stream to be used. ''' clause = self.async_clause if clause: if clause.queue is None: + # async is enabled on the default stream. return True return clause.queue + # No clause => async is not enabled. return False @async_queue.setter - def async_queue(self, async_queue: Union[bool, int, Reference, None]): + def async_queue(self, async_queue: Union[bool, int, DataNode]): ''' + Set the asynchronous behaviour associated with this node. + :param async_queue: Enable async support and attach it to the given queue. Can use False to disable, True to enable on default stream. Int to attach to the given stream - ID or use a variable Reference to say at runtime - what stream to be used. - + ID or use a PSyIR expression to say at runtime + which stream to be used. ''' - clause = self._create_clause(async_queue) + # `clause` will be None if async support is disabled. + clause = ACCAsyncMixin._create_clause(async_queue) existing = self.async_clause if existing: + # This node already had an ACCAsyncQueueClause so we have to either + # replace or remove it. if clause: existing.replace_with(clause) else: existing.detach() else: if clause: + # No existing clause but async support is now enabled so add + # the new clause. self.addchild(clause) def _build_async_string(self) -> str: @@ -149,14 +182,15 @@ def _build_async_string(self) -> str: Build the async arg to concat to the acc directive when generating the code in the old, 'gen_code' path. - :returns: The `async()` option to add to the directive. + :returns: The "async[()]" option to add to the directive. + ''' - # default result = "" # async clause = self.async_clause if clause: + # pylint: disable=import-outside-toplevel from psyclone.psyir.backend.fortran import FortranWriter result = f" {FortranWriter()(clause)}" diff --git a/src/psyclone/psyir/transformations/acc_kernels_trans.py b/src/psyclone/psyir/transformations/acc_kernels_trans.py index caded1d4d1..07469f490b 100644 --- a/src/psyclone/psyir/transformations/acc_kernels_trans.py +++ b/src/psyclone/psyir/transformations/acc_kernels_trans.py @@ -90,10 +90,19 @@ def apply( :param node: a node or list of nodes in the PSyIR to enclose. :param options: a dictionary with options for transformations. - "default_present": boolean whether or not the kernels + :param bool options["default_present"]: whether or not the kernels region should have the 'default present' attribute (indicating that data is already on the accelerator). When using managed memory this option should be False. + :param bool options["disable_loop_check"]: whether to disable the check + that the supplied region contains 1 or more loops. Default is False + (i.e. the check is enabled). + :param options["async_queue"]: whether or not to add the 'async' clause + to the new directive and if so, which queue to associate it with. + True to enable for the default queue or a queue value specified + with an int or PSyIR expression. + :type options["async_queue"]: + Union[bool, :py:class:`psyclone.psyir.nodes.DataNode`] ''' # Ensure we are always working with a list of nodes, even if only @@ -108,9 +117,6 @@ def apply( default_present = options.get("default_present", False) async_queue = options.get("async_queue", False) - # check - self.check_async_queue(node_list, async_queue) - # Create a directive containing the nodes in node_list and insert it. directive = ACCKernelsDirective( parent=parent, children=[node.detach() for node in node_list], @@ -190,11 +196,19 @@ def validate( :param nodes: the proposed PSyIR node or nodes to enclose in the kernels region. :param options: a dictionary with options for transformations. - "disable_loop_check": boolean whether to disable the + :param bool options["default_present"]: whether or not the kernels + region should have the 'default present' attribute (indicating + that data is already on the accelerator). When using managed + memory this option should be False. + :param bool options["disable_loop_check"]: whether to disable the check that the supplied region contains 1 or more loops. Default is False (i.e. the check is enabled). - - "async_queue": the async stream to be used. + :param options["async_queue"]: whether or not to add the 'async' clause + to the new directive and if so, which queue to associate it with. + True to enable for the default queue or a queue value specified + with an int or PSyIR expression. + :type options["async_queue"]: + Union[bool, :py:class:`psyclone.psyir.nodes.DataNode`] :raises NotImplementedError: if the supplied Nodes belong to a GOInvokeSchedule. diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index a80353fda3..7b5abb85dc 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -55,7 +55,7 @@ ACCEnterDataDirective, ACCKernelsDirective, ACCLoopDirective, ACCParallelDirective, ACCRegionDirective, ACCRoutineDirective, ACCUpdateDirective, ACCAtomicDirective, ACCWaitDirective, Assignment, - Literal, Reference, Return, Routine, Schedule) + BinaryOperation, Literal, Reference, Return, Routine, Schedule) from psyclone.psyir.nodes.loop import Loop from psyclone.psyir.symbols import ( Symbol, SymbolTable, DataSymbol, INTEGER_TYPE, UnresolvedType) @@ -750,6 +750,13 @@ def test_directives_async_queue(directive_type): assert directive.async_queue == Reference(Symbol("stream")) assert 'async(stream)' in directive.begin_string() + # Value is a PSyIR expression + directive.async_queue = BinaryOperation.create( + BinaryOperation.Operator.ADD, + Literal("1", INTEGER_TYPE), + Reference(Symbol("stream"))) + assert 'async(1 + stream)' in directive.begin_string() + # put wrong type with pytest.raises(TypeError) as error: directive.async_queue = 3.5 @@ -764,7 +771,7 @@ def test_mixin_constructor_error(): with pytest.raises(TypeError) as error: _ = ACCAsyncMixin(3.5) - assert ("Invalid async_queue value, expect Reference or integer or None " + assert ("Invalid async_queue value, expected DataNode, integer " "or bool, got : 3.5" in str(error)) diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index 9eca0645ba..852a6df668 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -46,6 +46,7 @@ # pylint: disable=too-many-lines import abc +from typing import Any, Dict, Optional, Union from psyclone import psyGen from psyclone.configuration import Config @@ -2489,7 +2490,7 @@ def name(self): ''' return "ACCEnterDataTrans" - def apply(self, sched, options={}): + def apply(self, sched: Schedule, options: Optional[Dict[str, Any]] = {}): # pylint: disable=arguments-renamed '''Adds an OpenACC "enter data" directive to the invoke associated with the supplied Schedule. Any fields accessed by OpenACC kernels @@ -2497,15 +2498,12 @@ def apply(self, sched, options={}): order to ensure they remain on the target device. :param sched: schedule to which to add an "enter data" directive. - :type sched: sub-class of :py:class:`psyclone.psyir.nodes.Schedule` :param options: a dictionary with options for transformations. The available options are : - async_queue : Permit to force using the given async stream if not False. - :type options: Optional[Dict[str, Any]] - ''' # Ensure that the proposed transformation is valid self.validate(sched, options) From 8b4e0e9e665162bcf1793c02b612bbcb2441ad16 Mon Sep 17 00:00:00 2001 From: Andrew Porter Date: Fri, 10 Jan 2025 17:18:50 +0000 Subject: [PATCH 18/18] #1664 fix lint --- src/psyclone/transformations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index 852a6df668..b5b581ff4f 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -46,7 +46,7 @@ # pylint: disable=too-many-lines import abc -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional from psyclone import psyGen from psyclone.configuration import Config