From b6d030070725a10b8425f1e3e672584dd700c826 Mon Sep 17 00:00:00 2001 From: dinghwah <48604964+dinghwah@users.noreply.github.com> Date: Wed, 5 Jul 2023 11:41:45 -0400 Subject: [PATCH] DAOS-13579 test: pool/svc.py pool destroy pool leader not responding dRPC (#12282) Update svc.py stop the new pool leader when there are no more non-leader ranks. Signed-off-by: Ding Ho ding-hwa.ho@intel.com --- src/tests/ftest/pool/svc.py | 41 ++++++++++++++++++++-------- src/tests/ftest/util/server_utils.py | 21 ++++++++++++++ 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/src/tests/ftest/pool/svc.py b/src/tests/ftest/pool/svc.py index f60a8ed7a7b..0507178f056 100644 --- a/src/tests/ftest/pool/svc.py +++ b/src/tests/ftest/pool/svc.py @@ -1,5 +1,5 @@ ''' - (C) Copyright 2018-2022 Intel Corporation. + (C) Copyright 2018-2023 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -134,24 +134,43 @@ def test_pool_svc(self): self.pool.wait_for_rebuild_to_end(interval=1) # Verify the pool leader has changed + self.log.info("Original pool_leader= %s", pool_leader) + self.log.info("Pool svc_ranks after pool_leader stopped= %s", non_leader_ranks) pool_leader = self.check_leader(pool_leader, True) - non_leader_ranks.remove(pool_leader) + mgmt_service_ranks = self.server_managers[0].management_service_ranks + self.log.info("New pool_leader= %s", pool_leader) + self.log.info("Management_service_ranks= %s", mgmt_service_ranks) + for rank in [pool_leader] + mgmt_service_ranks: + if rank in non_leader_ranks: + non_leader_ranks.remove(rank) + self.log.info( + "After excluded new leader + management_service_ranks, non_leader_ranks= %s", + non_leader_ranks) if svc_params[1] == 5: - # Stop a pool non-leader - non_leader = non_leader_ranks[-1] - self.log.info( - "Stopping a pool non-leader (%s): %s", non_leader_ranks, non_leader) + if non_leader_ranks: + # Stop a pool non-leader if non_leader_ranks is not empty + rank_to_kill = non_leader_ranks[-1] + self.log.info( + "Stopping a pool non-leader (%s): %s", non_leader_ranks, rank_to_kill) + else: + # Stop the pool new_leader if non_leader_ranks is empty + rank_to_kill = pool_leader + self.log.info( + "Pool non_leader_ranks is empty, Stopping the new pool leader: %s", + rank_to_kill) try: - self.server_managers[-1].stop_ranks([non_leader], self.test_log) + self.server_managers[-1].stop_ranks([rank_to_kill], self.test_log) except TestFail as error: self.log.info(error) self.fail( - "Error stopping a pool non-leader - " - "DaosServerManager.stop_ranks([{}])".format(non_leader)) + "Error stopping a pool rank - " + "DaosServerManager.stop_ranks([{}])".format(rank_to_kill)) self.pool.wait_for_rebuild_to_start(interval=1) self.pool.wait_for_rebuild_to_end(interval=1) - # Verify the pool leader has not changed - self.check_leader(pool_leader, False) + + if non_leader_ranks: + # Verify the pool leader has not changed + self.check_leader(pool_leader, False) self.log.info("Test passed!") diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index f10dc8b7a94..030a6cf51b6 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -14,6 +14,7 @@ from avocado import fail_on +from ClusterShell.NodeSet import NodeSet from command_utils_base import CommonConfig, BasicParameter from command_utils import SubprocessManager from dmg_utils import get_dmg_command @@ -158,6 +159,26 @@ def ranks(self): """ return {rank: value["host"] for rank, value in self._expected_states.items()} + @property + def management_service_hosts(self): + """Get the hosts running the management service. + + Returns: + NodeSet: the hosts running the management service + + """ + return NodeSet.fromlist(self.get_config_value('access_points')) + + @property + def management_service_ranks(self): + """Get the ranks running the management service. + + Returns: + list: a list of ranks (int) running the management service + + """ + return self.get_host_ranks(self.management_service_hosts) + def get_params(self, test): """Get values for all of the command params from the yaml file.