diff --git a/changelog b/changelog index 8a0c4c0540..d2b9a4a14a 100644 --- a/changelog +++ b/changelog @@ -1,3 +1,7 @@ + 51) PR #3205 for #3306 and towards #3157. Initial implementation of + a generic MaximalRegionTrans and specialisations to create spanning + OMPParallel regions and NEMO Profiling regions. + 50) PR #3338 for #1651. Fixes Reference2ArrayRange issues with capitalisation and test nested WHEREs. diff --git a/examples/nemo/eg7/openmp_cpu_nowait_trans.py b/examples/nemo/eg7/openmp_cpu_nowait_trans.py index 016f56fdd8..aefe2311fb 100755 --- a/examples/nemo/eg7/openmp_cpu_nowait_trans.py +++ b/examples/nemo/eg7/openmp_cpu_nowait_trans.py @@ -42,67 +42,21 @@ OMPLoopTrans, OMPMinimiseSyncTrans, TransformationError, - OMPParallelTrans + MaximalOMPParallelRegionTrans ) from psyclone.psyir.nodes import ( Assignment, Directive, - IfBlock, Loop, - OMPBarrierDirective, - OMPDoDirective, Routine, ) -def add_parallel_region_to_contiguous_directives(schedule): - '''Adds OMPParallelDirective nodes around areas of the schedule with - contiguous OpenMP directives. - - :param schedule: The Schedule to add OpenMPParallelDirectives to. - :type schedule: :py:class:`psyclone.nodes.Schedule` - ''' - par_trans = OMPParallelTrans() - start = -1 - end = -1 - sets = [] - # Loop through the children, if its an OpenMP directive add it - # to the current set - for child in schedule: - if isinstance(child, (OMPDoDirective, OMPBarrierDirective)): - if start < 0: - start = child.position - end = child.position + 1 - else: - # If we have a non OMPDodirective/OMPBarrierDirective then add - # an OMPParallelDirective if needed. - if start >= 0: - sets.append((start, end)) - start = -1 - end = -1 - # Recurse appropriately to sub schedules: - if isinstance(child, Loop): - add_parallel_region_to_contiguous_directives(child.loop_body) - elif isinstance(child, IfBlock): - add_parallel_region_to_contiguous_directives(child.if_body) - if child.else_body: - add_parallel_region_to_contiguous_directives( - child.else_body - ) - # If we get to the end and need to enclose some nodes in a parallel - # directive we do it now - if start >= 0: - sets.append((start, end)) - - for subset in sets[::-1]: - par_trans.apply(schedule[subset[0]:subset[1]]) - - def trans(psyir): ''' Adds OpenMP Loop directives with nowait to Nemo loops over levels. - This is followed by applying OpenMP parallel directives as required, - before removing barriers where possible. - + This is followed by applying OpenMP parallel directives as required + with the OMPMaximalParallelRegionTrans, before removing barriers where + possible. :param psyir: the PSyIR of the provided file. :type psyir: :py:class:`psyclone.psyir.nodes.FileContainer` @@ -130,5 +84,5 @@ def trans(psyir): # Apply the largest possible parallel regions and remove any barriers that # can be removed. for routine in psyir.walk(Routine): - add_parallel_region_to_contiguous_directives(routine) + MaximalOMPParallelRegionTrans().apply(routine) minsync_trans.apply(routine) diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index d30905aeee..db6da92987 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -40,14 +40,14 @@ from psyclone.domain.common.transformations import KernelModuleInlineTrans from psyclone.psyir.nodes import ( - Assignment, Loop, Directive, Node, Reference, CodeBlock, Call, Return, - IfBlock, Routine, Schedule, IntrinsicCall, StructureReference) + Assignment, Loop, Directive, Node, Reference, CodeBlock, Call, + Routine, Schedule, IntrinsicCall, StructureReference, IfBlock) from psyclone.psyir.symbols import DataSymbol from psyclone.psyir.transformations import ( ArrayAssignment2LoopsTrans, HoistLoopBoundExprTrans, HoistLocalArraysTrans, HoistTrans, InlineTrans, Maxval2LoopTrans, ProfileTrans, OMPMinimiseSyncTrans, Reference2ArrayRangeTrans, - ScalarisationTrans, IncreaseRankLoopArraysTrans) + ScalarisationTrans, IncreaseRankLoopArraysTrans, MaximalRegionTrans) from psyclone.transformations import TransformationError # USE statements to chase to gather additional symbol information. @@ -474,6 +474,40 @@ def add_profiling(children: Union[List[Node], Schedule]): attempt to add profiling regions. ''' + class MaximalProfilingOutsideDirectivesTrans(MaximalRegionTrans): + '''Applies Profiling to the largest possible region outside of + directive regions. + + :param routine_name: The name of the Routine being profiled. + ''' + # We purposely don't encompase Directive, or Return statements + # (which would create unclosed hooks). + _allowed_contiguous_statements = (Assignment, Call, CodeBlock) + _transformation = ProfileTrans + + def _satisfies_minimum_region_rules(self, region: list[Node]) -> bool: + '''Returns whether the provided node list satisfies the + requirements to create a region for the ProfileTrans. + + :param region: The candidate region to have the transformation + applied. + :returns: whether the provided node list should have profiling + applied. + ''' + if len(region) == 1: + if (isinstance(region[0], CodeBlock) and + len(region[0].get_ast_nodes) == 1): + # Don't create profiling regions for CodeBlocks consisting + # of a single statement. + return False + if (isinstance(region[0], IfBlock) and + "was_single_stmt" in region[0].annotations and + isinstance(region[0].if_body[0], CodeBlock)): + # We also don't put single statements consisting of + # 'If(condition) call blah()' inside profiling regions. + return False + return super()._satisfies_minimum_region_rules(region) + if children and isinstance(children, Schedule): # If we are given a Schedule, we look at its children. children = children.children @@ -486,56 +520,6 @@ def add_profiling(children: Union[List[Node], Schedule]): parent_routine = children[0].ancestor(Routine) if parent_routine and parent_routine.return_symbol: return - - node_list = [] - for child in children[:]: - # Do we want this node to be included in a profiling region? - if child.walk((Directive, Return)): - # It contains a directive or return statement so we put what we - # have so far inside a profiling region. - add_profile_region(node_list) - # A node that is not included in a profiling region marks the - # end of the current candidate region so reset the list. - node_list = [] - # Now we go down a level and try again without attempting to put - # profiling below directives or within Assignments - if isinstance(child, IfBlock): - add_profiling(child.if_body) - add_profiling(child.else_body) - elif not isinstance(child, (Assignment, Directive)): - add_profiling(child.children) - else: - # We can add this node to our list for the current region - node_list.append(child) - add_profile_region(node_list) - - -def add_profile_region(nodes): - ''' - Attempt to put the supplied list of nodes within a profiling region. - - :param nodes: list of sibling PSyIR nodes to enclose. - :type nodes: list of :py:class:`psyclone.psyir.nodes.Node` - - ''' - if nodes: - # Check whether we should be adding profiling inside this routine - routine_name = nodes[0].ancestor(Routine).name.lower() - if any(ignore in routine_name for ignore in PROFILING_IGNORE): - return - if len(nodes) == 1: - if isinstance(nodes[0], CodeBlock) and \ - len(nodes[0].get_ast_nodes) == 1: - # Don't create profiling regions for CodeBlocks consisting - # of a single statement - return - if isinstance(nodes[0], IfBlock) and \ - "was_single_stmt" in nodes[0].annotations and \ - isinstance(nodes[0].if_body[0], CodeBlock): - # We also don't put single statements consisting of - # 'IF(condition) CALL blah()' inside profiling regions - return - try: - ProfileTrans().apply(nodes) - except TransformationError: - pass + routine_name = parent_routine.name if parent_routine else "" + if routine_name not in PROFILING_IGNORE: + MaximalProfilingOutsideDirectivesTrans().apply(children) diff --git a/src/psyclone/psyir/transformations/__init__.py b/src/psyclone/psyir/transformations/__init__.py index 5b3a3f8386..538c49c5ad 100644 --- a/src/psyclone/psyir/transformations/__init__.py +++ b/src/psyclone/psyir/transformations/__init__.py @@ -94,6 +94,8 @@ from psyclone.psyir.transformations.loop_trans import LoopTrans from psyclone.psyir.transformations.value_range_check_trans import ( ValueRangeCheckTrans) +from psyclone.psyir.transformations.maximal_region_trans import ( + MaximalRegionTrans) from psyclone.psyir.transformations.omp_critical_trans import ( OMPCriticalTrans) from psyclone.psyir.transformations.omp_loop_trans import OMPLoopTrans @@ -126,6 +128,9 @@ OMPTaskloopTrans from psyclone.psyir.transformations.omp_declare_target_trans import \ OMPDeclareTargetTrans +from psyclone.psyir.transformations.maximal_omp_parallel_region_trans import ( + MaximalOMPParallelRegionTrans +) from psyclone.psyir.transformations.omp_parallel_trans import ( OMPParallelTrans, ) @@ -182,6 +187,8 @@ "ParallelRegionTrans", "OMPTaskloopTrans", "OMPDeclareTargetTrans", + "MaximalRegionTrans", "OMPCriticalTrans", + "MaximalOMPParallelRegionTrans", "OMPParallelTrans", ] diff --git a/src/psyclone/psyir/transformations/maximal_omp_parallel_region_trans.py b/src/psyclone/psyir/transformations/maximal_omp_parallel_region_trans.py new file mode 100644 index 0000000000..7902a49f0f --- /dev/null +++ b/src/psyclone/psyir/transformations/maximal_omp_parallel_region_trans.py @@ -0,0 +1,93 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2017-2025, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors A. B. G. Chalk, STFC Daresbury Lab + +'''This module contains the MaximalOMPParallelRegionTrans.''' + +from typing import Union + +from psyclone.psyir.nodes import ( + OMPTaskwaitDirective, + OMPBarrierDirective, + OMPSerialDirective, + OMPTaskloopDirective, + OMPDoDirective, + OMPLoopDirective, + OMPTaskDirective, + DynamicOMPTaskDirective, + Node, + Schedule +) +from psyclone.psyir.transformations.maximal_region_trans import ( + MaximalRegionTrans) +from psyclone.psyir.transformations.omp_parallel_trans import OMPParallelTrans +from psyclone.utils import transformation_documentation_wrapper + + +@transformation_documentation_wrapper +class MaximalOMPParallelRegionTrans(MaximalRegionTrans): + '''Applies OpenMP Parallel directives around the largest possible sections + of the input. + + At current, this will never place OpenMP parallel sections around + Assignments that are outside of another OpenMP directive. See #3157 and + the discussion on #3205 for more detail.''' + # The type of parallel transformation to be applied to the input region. + _transformation = OMPParallelTrans + # Tuple of statement nodes allowed inside the _transformation + _allowed_contiguous_statements = ( + OMPTaskwaitDirective, + OMPBarrierDirective, + OMPSerialDirective, + OMPTaskloopDirective, + OMPDoDirective, + OMPLoopDirective, + OMPTaskDirective, + DynamicOMPTaskDirective, + ) + _required_nodes = ( + OMPSerialDirective, + OMPTaskloopDirective, + OMPDoDirective, + OMPLoopDirective, + OMPTaskDirective, + DynamicOMPTaskDirective, + ) + + def apply(self, nodes: Union[Node, Schedule, list[Node]], **kwargs): + '''Applies the transformation to the nodes provided. + + :param nodes: can be a single node, a schedule or a list of nodes. + ''' + super().apply(nodes, **kwargs) diff --git a/src/psyclone/psyir/transformations/maximal_region_trans.py b/src/psyclone/psyir/transformations/maximal_region_trans.py new file mode 100644 index 0000000000..2642df88a3 --- /dev/null +++ b/src/psyclone/psyir/transformations/maximal_region_trans.py @@ -0,0 +1,257 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2025-2026, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors A. B. G. Chalk, STFC Daresbury Lab + +'''This module contains the MaximalRegionTrans.''' + +import abc +from typing import Union + +from psyclone.psyir.nodes import ( + Node, + Schedule, + Loop, + IfBlock, + WhileLoop, +) +from psyclone.psyGen import Transformation +from psyclone.psyir.transformations.region_trans import RegionTrans +from psyclone.psyir.transformations.transformation_error import \ + TransformationError +from psyclone.utils import transformation_documentation_wrapper + + +@transformation_documentation_wrapper +class MaximalRegionTrans(RegionTrans, metaclass=abc.ABCMeta): + ''' + Abstract transformation containing the functionality to apply + another transformation to the largest code segments possible + while satisfying its validation and any additionally provided + constraints. + + Subclasses should override the _transformation, _required_nodes, and + _allowed_contiguous_statements members to control the functionality. + ''' + #: The type of transformation to be applied to the input region. + _transformation = None + #: Tuple of top-level statement nodes allowed inside the _transformation. + #: Loops and IfBlocks are always recursed into if they're not part of this + #: tuple, and their children will be checked to see which sections can + #: have the transformation applied. + _allowed_contiguous_statements = () + #: Tuple of nodes that there must be at least one of inside the block + #: to be transformed, else the block can be ignored (e.g. a block of + #: only barriers doesn't need to be transformed). Defaults to any Node. + _required_nodes = (Node) + + def _node_allowed(self, node: Node) -> bool: + '''Returns whether the provided node is allowed in the _transformation. + + The default implementation checks whether the node is an instance + of the _allowed_contiguous_statements tuple, but subclasses may + override this with additional functionality (e.g. to check if a + function is pure). + + :param node: the candidate node to be in the transformation region. + :returns: whether the node is allowed to be in the transformed region. + ''' + return isinstance(node, self._allowed_contiguous_statements) + + def _satisfies_minimum_region_rules(self, region: list[Node]) -> bool: + '''Returns whether the provided node list satisfies the requirements + to create a region for the _transformation. + + The default implementation checks whether a _required_node is present + in the region, but subclasses may override this with additional + functionality. + + :param region: The candidate region to have the transformation + applied. + :returns: whether the provided node list should have the + _transformation applied. + ''' + for node in region: + if node.walk(self._required_nodes, + stop_type=self._required_nodes): + return True + return False + + def _can_be_in_region(self, node: Node) -> bool: + '''Returns whether the provided node can be included in a + region. Loops and if statements are recursed into to check if their + children can be. + + :param node: the candidate Node to be placed into a transformed + region. + + :returns: whether it is safe to add the node to a transformed region. + ''' + if self._node_allowed(node): + return True + + if isinstance(node, (Loop, WhileLoop)): + # Check that all contents of the loop body can be part + # of the region. + for child in node.loop_body: + if not self._can_be_in_region(child): + break + else: + return True + return False + + if isinstance(node, IfBlock): + # Check that all contents of each branch body can be part + # of the region. + allowed = True + for child in node.if_body: + allowed = (allowed and self._can_be_in_region(child)) + if node.else_body and allowed: + for child in node.else_body: + allowed = (allowed and + self._can_be_in_region(child)) + return allowed + + # All other node types we default to False. + return False + + def _compute_transformable_sections( + self, node_list: list[Node], + trans: Transformation, + ) -> list[list[Node]]: + ''' + Computes the sections of the input node_list to apply the + transformation to. + + :param node_list: The node_list passed into this Transformation. + :param trans: The transformation applied to the regions found. + :returns: The list of node_lists to apply this class' + _transformation class to. + ''' + # Find the largest sections we can surround with the transformation. + all_blocks = [] + current_block = [] + for child in node_list: + # If the child can be added to a transformed region then add it + # to the current block of nodes. + if self._can_be_in_region(child): + # Check that validation still succeeds if we add this child + # to the current block. + try: + trans.validate(current_block + [child]) + current_block.append(child) + except TransformationError: + # If validation now fails, then don't add this to the + # current block and add the block to the allowed blocks + # if allowed. + if current_block: + if self._satisfies_minimum_region_rules(current_block): + all_blocks.append(current_block) + current_block = [] + else: + # Otherwise, if the current_block contains any children, + # add them to the list of regions to be transformed and reset + # the current_block. + if current_block: + if self._satisfies_minimum_region_rules(current_block): + all_blocks.append(current_block) + current_block = [] + # Need to recurse on some node types + if isinstance(child, IfBlock): + if_blocks = self._compute_transformable_sections( + child.if_body, trans + ) + all_blocks.extend(if_blocks) + if child.else_body: + else_blocks = self._compute_transformable_sections( + child.else_body, trans + ) + all_blocks.extend(else_blocks) + if isinstance(child, (Loop, WhileLoop)): + loop_blocks = self._compute_transformable_sections( + child.loop_body, trans + ) + all_blocks.extend(loop_blocks) + # If any nodes are left in the current block at the end of the + # node_list, then add them to a transformed region + if current_block: + if self._satisfies_minimum_region_rules(current_block): + all_blocks.append(current_block) + + return all_blocks + + def validate(self, nodes: Union[Node, Schedule, list[Node]], **kwargs): + '''Validates whether this transformation can be applied to the + nodes provided. + + :param nodes: can be a single node, a schedule or a list of nodes. + + :raises TransformationError: if the nodes provided don't all have the + same parent and aren't consecutive. + ''' + + self.validate_options(**kwargs) + node_list = self.get_node_list(nodes) + + node_parent = node_list[0].parent + prev_position = node_list[0].position + for child in node_list[1:]: + if child.parent is not node_parent: + raise TransformationError( + f"Error in {self.name} transformation: supplied nodes " + f"are not children of the same parent.") + if prev_position+1 != child.position: + raise TransformationError( + f"Children are not consecutive children of one parent: " + f"child '{child.debug_string().rstrip()}' has position " + f"{child.position}, but previous child had position " + f"{prev_position}.") + prev_position = child.position + + def apply(self, nodes: Union[Node, Schedule, list[Node]], **kwargs): + '''Applies the transformation to the nodes provided. + + :param nodes: can be a single node, a schedule or a list of nodes. + ''' + node_list = self.get_node_list(nodes) + + # Call validate. + self.validate(nodes, **kwargs) + + par_trans = self._transformation() + + all_blocks = self._compute_transformable_sections(node_list, par_trans) + + # Apply the transformation to all of the blocks found. + for block in all_blocks: + par_trans.apply(block) diff --git a/src/psyclone/psyir/transformations/omp_minimise_sync_trans.py b/src/psyclone/psyir/transformations/omp_minimise_sync_trans.py index 0b4cb08a0a..5ce267e542 100644 --- a/src/psyclone/psyir/transformations/omp_minimise_sync_trans.py +++ b/src/psyclone/psyir/transformations/omp_minimise_sync_trans.py @@ -172,6 +172,18 @@ def validate(self, node: Routine, **kwargs) -> None: raise TypeError(f"OMPMinimiseSyncTrans expects a Routine input " f"but found '{type(node).__name__}'.") + def _eliminate_uncontained_barriers(self, routine: Routine) -> None: + ''' + Removes any OMPBarrierDirectives that are not inside an + OMPParallelRegion. + + :param routine: the routine to remove uncontainined barriers from. + ''' + barriers = routine.walk(OMPBarrierDirective) + for bar in barriers: + if bar.ancestor(OMPParallelDirective) is None: + bar.detach() + def _eliminate_adjacent_barriers(self, routine: Routine, bar_type: type) -> None: ''' @@ -537,6 +549,9 @@ def apply(self, node: Routine, **kwargs) -> None: # if its a OMPBarrierDirective as they are unnecessary. for parallel in node.walk(OMPParallelDirective): _eliminate_final_parallel_barrier(parallel) + # Finally eliminate any barriers leftover outside of parallel + # regions, as these are now superfluous + self._eliminate_uncontained_barriers(node) # Eliminate OMPTaskwaitDirectives for the gpu_directives if len(gpu_directives) > 0: self._eliminate_adjacent_barriers(node, OMPTaskwaitDirective) diff --git a/src/psyclone/tests/psyir/transformations/maximal_ompparallel_region_trans_test.py b/src/psyclone/tests/psyir/transformations/maximal_ompparallel_region_trans_test.py new file mode 100644 index 0000000000..8c52862ca2 --- /dev/null +++ b/src/psyclone/tests/psyir/transformations/maximal_ompparallel_region_trans_test.py @@ -0,0 +1,155 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2026, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors A. B. G. Chalk, STFC Daresbury Lab + +'''This module contains the tests for the MaximalOMPParallelRegionTrans.''' + +from psyclone.psyir.nodes import ( + Loop, + OMPBarrierDirective, + OMPParallelDirective, +) +from psyclone.psyir.transformations import ( + MaximalOMPParallelRegionTrans, + OMPLoopTrans +) + + +def test_maximal_ompparallel_region_trans_apply(fortran_reader): + ''' Test the apply method of the ompparallel region transformation.''' + code = """subroutine x + integer :: i + i = 1 + end subroutine x""" + psyir = fortran_reader.psyir_from_source(code) + MaximalOMPParallelRegionTrans().apply(psyir.children[0].children[:]) + assert len(psyir.walk(OMPParallelDirective)) == 0 + + # Test that we only get a single parallel region when we have Ifblocks + # and loops around some of the parallel do region. + code = """subroutine x(arr) + integer :: i, j, k + integer, dimension(:,:,:) :: arr + + !Adding omp do here. + do i = 1, 100 + do j = 1, 100 + do k = 1, 100 + arr(k,j,i) = 1 + end do + end do + end do + + if(.true.) then + !Adding omp do here. + do i = 1, 100 + do j = 1, 100 + do k = 1, 100 + arr(k,j,i) = 2 + end do + end do + end do + else + !Adding omp do here. + do i = 1, 100 + do j = 1, 100 + do k = 1, 100 + arr(k,j,i) = 2 + end do + end do + end do + end if + + do i = 1, 100 + !Adding omp do here. + do j = 1, 100 + do k = 1, 100 + arr(k,j,i) = 2 + end do + end do + + end do + + end subroutine x""" + + psyir = fortran_reader.psyir_from_source(code) + ltrans = OMPLoopTrans() + loops = psyir.walk(Loop) + # Add omp do directives to the comments loops in the code fragment. + ltrans.apply(loops[0], collapse=True) + ltrans.apply(loops[3], collapse=True) + ltrans.apply(loops[6], collapse=True) + ltrans.apply(loops[10], collapse=True) + # Apply the maximal parallel region trans to the code. + MaximalOMPParallelRegionTrans().apply(psyir.children[0].children[:]) + # The routine should now have one child and it should be the only + # OMPParallelDirective + assert len(psyir.walk(OMPParallelDirective)) == 1 + assert len(psyir.children[0].children) == 1 + assert isinstance(psyir.children[0].children[0], OMPParallelDirective) + + # Check that we don't get a parallel directive around only barriers. + code = """subroutine x + integer :: i, j, k + integer, dimension(:,:,:) :: arr + + !Adding omp do here. + do i = 1, 100 + do j = 1, 100 + do k = 1, 100 + arr(k,j,i) = 1 + end do + end do + end do + + ! parallel region won't go past the assignment here. + arr = 2 + + ! Add some barriers here. + end subroutine x""" + psyir = fortran_reader.psyir_from_source(code) + loops = psyir.walk(Loop) + ltrans.apply(loops[0], collapse=True) + # Add two barriers at the end + psyir.children[0].addchild(OMPBarrierDirective()) + psyir.children[0].addchild(OMPBarrierDirective()) + # Apply the maximal parallel region trans to the code. + MaximalOMPParallelRegionTrans().apply(psyir.children[0].children[:]) + # The routine should now have four children and the first should be the + # only OMPParallelDirective, and the last two still the barriers. + assert len(psyir.walk(OMPParallelDirective)) == 1 + assert len(psyir.children[0].children) == 4 + assert isinstance(psyir.children[0].children[0], OMPParallelDirective) + assert isinstance(psyir.children[0].children[2], OMPBarrierDirective) + assert isinstance(psyir.children[0].children[3], OMPBarrierDirective) diff --git a/src/psyclone/tests/psyir/transformations/maximal_region_trans_test.py b/src/psyclone/tests/psyir/transformations/maximal_region_trans_test.py new file mode 100644 index 0000000000..ad62f9c510 --- /dev/null +++ b/src/psyclone/tests/psyir/transformations/maximal_region_trans_test.py @@ -0,0 +1,344 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2026, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors A. B. G. Chalk, STFC Daresbury Lab + +'''This module contains the tests for the MaximalRegionTrans.''' + +import pytest + +from psyclone.psyGen import Transformation +from psyclone.psyir.nodes import ( + Assignment, + IfBlock, + Routine, + OMPParallelDirective, +) +from psyclone.psyir.transformations import ( + MaximalRegionTrans, + TransformationError, + OMPParallelTrans +) + + +class MaxParTrans(MaximalRegionTrans): + ''' Dummy class to test MaxParallelRegionTrans' functionality. ''' + # The apply function will do OMPParallelTrans around allowed regions. + _transformation = OMPParallelTrans + # We're only allowing assignment because its straightforward to test with. + _allowed_contiguous_statements = (Assignment, ) + # Should parallelise any found region that contains an assignment. + _required_nodes = (Assignment, ) + + +@pytest.mark.parametrize( + "statement,expected", + [ + ("i = 1", True), + ("call a_function()", False), + ("do i = 1, 100\nj = j + 1\nend do", True), + ("do i = 1, 100\ncall a_function()\nend do", False), + ("if (.true.) then\nj=3\nend if", True), + ("if(.true.) then\nj=3\nelse\nj=3\nend if", True), + ("if(.true.) then\ncall a_function()\nelse\nj=3\nendif", False), + ("if(.true.) then\nj=3\nelse\ncall a_function()\nendif", False), + ] +) +def test_can_be_in_region(fortran_reader, statement, expected): + '''Test the _can_be_in_region function of MaxParallelRegionTrans.''' + code = f""" + subroutine test + use some_module + integer :: i, j + {statement} + end subroutine test + """ + psyir = fortran_reader.psyir_from_source(code) + routine = psyir.walk(Routine)[0] + trans = MaxParTrans() + assert trans._can_be_in_region(routine.children[0]) == expected + + +def test_validate(fortran_reader): + '''Test the validate function of MaxParallelRegionTrans.''' + code = """ + subroutine test + integer :: i, j + i = 1 + j = 1 + k = i + 1 + if(.true.) then + k = i + j + end if + end subroutine test""" + psyir = fortran_reader.psyir_from_source(code) + routine = psyir.walk(Routine)[0] + trans = MaxParTrans() + # Validate should allow us to give the full children + trans.validate(routine.children) + + # Validate should not allow non consecutive children + with pytest.raises(TransformationError) as err: + trans.validate([routine.children[0], routine.children[2]]) + assert ("Children are not consecutive children of one parent: child " + "'k = i + 1' has position 2, but previous child had position 0." + in str(err.value)) + + # Validate should not allow children of different parents. + with pytest.raises(TransformationError) as err: + trans.validate([routine.children[0], + routine.children[3].if_body.children[0]]) + assert ("Error in MaxParTrans transformation: supplied nodes are not " + "children of the same parent" in str(err.value)) + + +def test_apply(fortran_reader): + '''Test the apply function of MaxParallelRegionTrans.''' + code = """ + subroutine test + use some_module + integer :: i, j + i = 1 + j = 1 + call a_function() + if(.true.) then + i = 1 + end if + j = 1 + end subroutine test + """ + psyir = fortran_reader.psyir_from_source(code) + routine = psyir.walk(Routine)[0] + mtrans = MaxParTrans() + mtrans.apply(routine) + # The result should be two OMPParallelDirectives, one containing + # i = 1 and j = 1, and another containing the IFBlock and the second j = 1 + dirs = routine.walk(OMPParallelDirective) + assert len(dirs) == 2 + + assert len(dirs[0].dir_body.children) == 2 + assert dirs[0].dir_body.children[0].debug_string() == "i = 1\n" + assert dirs[0].dir_body.children[1].debug_string() == "j = 1\n" + + assert isinstance(dirs[1].dir_body.children[0], IfBlock) + assert dirs[1].dir_body.children[1].debug_string() == "j = 1\n" + + code = """subroutine x + integer :: i, j, k, l + + i = 1 + j = 2 + k = 3 + l = 4 + end subroutine x""" + psyir = fortran_reader.psyir_from_source(code) + assigns = psyir.walk(Assignment) + mtrans.apply(assigns) + assert len(psyir.walk(OMPParallelDirective)) == 1 + pdir = psyir.walk(OMPParallelDirective)[0] + # All the assignments should be in the parallel directive. + for assign in assigns: + assert assign.parent.parent is pdir + + code = """subroutine x + integer :: i, j, k, l + + i = 1 + do j = 2, 3 + k = 1 + end do + if (j == 2) then + k = 4 + end if + do while(j < 3) + j = j + 1 + end do + i = 4 + end subroutine x + """ + psyir = fortran_reader.psyir_from_source(code) + nodes = psyir.walk(Routine)[0].children[:] + mtrans.apply(nodes) + assert len(psyir.walk(OMPParallelDirective)) == 1 + pdir = psyir.walk(OMPParallelDirective)[0] + # All of the blocks here should be in the same ParallelDirective + for node in nodes: + assert node.parent.parent is pdir + + code = """subroutine x + use some_mod + integer :: i + + i = 1 + call something() + i = 2 + end subroutine x""" + psyir = fortran_reader.psyir_from_source(code) + nodes = psyir.walk(Routine)[0].children[:] + mtrans.apply(nodes) + pdirs = psyir.walk(OMPParallelDirective) + assert len(pdirs) == 2 + # All of the blocks here should be in the same ParallelDirective + assert nodes[0].parent.parent is pdirs[0] + assert not nodes[1].ancestor(OMPParallelDirective) + assert nodes[2].parent.parent is pdirs[1] + + code = """subroutine x + use some_mod + integer :: i, j + + if(i == 1) then + call something() + i = 2 + else + i = 3 + end if + + do i = 1,5 + call something() + j = 2 + end do + + do while(j == 3) + call something() + j = j + 2 + end do + end subroutine x""" + # Each of the nodes should contain OMPParallels inside them and there + # should be no top level OMPParallelDirective + psyir = fortran_reader.psyir_from_source(code) + nodes = psyir.walk(Routine)[0].children[:] + mtrans.apply(nodes) + assert len(psyir.walk(OMPParallelDirective)) == 4 + assert len(nodes[0].walk(OMPParallelDirective)) == 2 + assert len(nodes[0].if_body.children) == 2 + assert isinstance(nodes[0].if_body.children[1], OMPParallelDirective) + assert isinstance(nodes[0].else_body.children[0], OMPParallelDirective) + + # Dummy class to test failing validation. + class Faketrans(Transformation): + '''Dummy transformation to test failing validation.''' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._validate_count = 0 + + def validate(self, node, **kwargs): + if self._validate_count < 1: + self._validate_count = self._validate_count + 1 + return + raise TransformationError("") + + def apply(self, node, **kwargs): + self._validate_count = 0 + OMPParallelTrans().apply(node, **kwargs) + + class OneParTrans(MaximalRegionTrans): + '''Dummy MaximalRegionTrans that uses our FakeTrans''' + _transformation = Faketrans + _allowed_contiguous_statements = (Assignment, ) + _required_nodes = (Assignment, ) + + code = """subroutine x + use some_mod + integer :: i, j + + if(i == 1) then + call something() + i = 2 + else + i = 3 + end if + + do i = 1,5 + call something() + j = 2 + end do + + do while(j == 3) + call something() + j = j + 2 + end do + end subroutine x""" + # Each of the nodes should contain OMPParallels inside them and there + # should be no top level OMPParallelDirective + psyir = fortran_reader.psyir_from_source(code) + nodes = psyir.walk(Routine)[0].children[:] + mtrans = OneParTrans() + mtrans.apply(nodes) + # Validate fails on all but the first try so we only get one resulting + # OMPParallelDirective + assert len(psyir.walk(OMPParallelDirective)) == 1 + + +def test_validation_failure_during_compute_transformable_sections( + fortran_reader +): + '''Test that if validation fails during compute transformable section + we still get a section containing the nodes that did pass validation.''' + + # Create a transformation which fails validation if the lhs symbol's name + # for the assignment isn't a + # Dummy class to test failing validation. + class Faketrans(Transformation): + '''Dummy transformation to test failing validation.''' + def validate(self, node_list: list[Assignment], **kwargs): + for node in node_list: + if node.lhs.symbol.name != "a": + raise TransformationError("Isn't a") + + def apply(self, node: Assignment, **kwargs): + OMPParallelTrans().apply(node, **kwargs) + + class OneParTrans(MaximalRegionTrans): + '''Dummy MaximalRegionTrans that uses our FakeTrans''' + _transformation = Faketrans + _allowed_contiguous_statements = (Assignment, ) + _required_nodes = (Assignment, ) + + code = """subroutine test + integer :: a + integer :: b + + a = 1 + b = 2 + a = 3 + end subroutine test""" + psyir = fortran_reader.psyir_from_source(code) + routine = psyir.walk(Routine)[0] + nodes = routine.children[:] + mtrans = OneParTrans() + mtrans.apply(nodes) + assert len(psyir.walk(OMPParallelDirective)) == 2 + assert isinstance(routine.children[0], OMPParallelDirective) + assert isinstance(routine.children[1], Assignment) + assert isinstance(routine.children[2], OMPParallelDirective) diff --git a/src/psyclone/tests/psyir/transformations/omp_minimise_sync_trans_test.py b/src/psyclone/tests/psyir/transformations/omp_minimise_sync_trans_test.py index e0c68adf01..73517b3b0c 100644 --- a/src/psyclone/tests/psyir/transformations/omp_minimise_sync_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/omp_minimise_sync_trans_test.py @@ -67,6 +67,25 @@ def test_omp_remove_barrier_validate(): in str(excinfo.value)) +def test_omp_eliminate_uncontained_barriers(fortran_reader): + ''' + Test the _eliminate_uncontained_barriers routine of the + OMPMinimiseSyncTrans.''' + code = """subroutine test + + end subroutine + """ + psyir = fortran_reader.psyir_from_source(code) + routine = psyir.walk(Routine)[0] + routine.addchild(OMPBarrierDirective()) + routine.addchild(OMPBarrierDirective()) + partrans = OMPParallelTrans() + partrans.apply(routine.children[1]) + assert len(routine.walk(OMPBarrierDirective)) == 2 + OMPMinimiseSyncTrans()._eliminate_uncontained_barriers(routine) + assert len(routine.walk(OMPBarrierDirective)) == 1 + + def test_omp_eliminate_adjacent_barriers(fortran_reader): '''Test the _eliminate_adjacent_barriers routine of the OMPMinimiseSyncTrans.'''