Skip to content

Commit

Permalink
DAOS-13579 test: pool/svc.py pool destroy pool leader not responding …
Browse files Browse the repository at this point in the history
…dRPC (#12282)

Update svc.py stop the new pool leader when there are no more non-leader ranks.

Signed-off-by: Ding Ho ding-hwa.ho@intel.com
  • Loading branch information
dinghwah committed Jul 5, 2023
1 parent 28f7c2c commit b6d0300
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 11 deletions.
41 changes: 30 additions & 11 deletions src/tests/ftest/pool/svc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
'''
(C) Copyright 2018-2022 Intel Corporation.
(C) Copyright 2018-2023 Intel Corporation.
SPDX-License-Identifier: BSD-2-Clause-Patent
'''
Expand Down Expand Up @@ -134,24 +134,43 @@ def test_pool_svc(self):
self.pool.wait_for_rebuild_to_end(interval=1)

# Verify the pool leader has changed
self.log.info("Original pool_leader= %s", pool_leader)
self.log.info("Pool svc_ranks after pool_leader stopped= %s", non_leader_ranks)
pool_leader = self.check_leader(pool_leader, True)
non_leader_ranks.remove(pool_leader)
mgmt_service_ranks = self.server_managers[0].management_service_ranks
self.log.info("New pool_leader= %s", pool_leader)
self.log.info("Management_service_ranks= %s", mgmt_service_ranks)
for rank in [pool_leader] + mgmt_service_ranks:
if rank in non_leader_ranks:
non_leader_ranks.remove(rank)
self.log.info(
"After excluded new leader + management_service_ranks, non_leader_ranks= %s",
non_leader_ranks)

if svc_params[1] == 5:
# Stop a pool non-leader
non_leader = non_leader_ranks[-1]
self.log.info(
"Stopping a pool non-leader (%s): %s", non_leader_ranks, non_leader)
if non_leader_ranks:
# Stop a pool non-leader if non_leader_ranks is not empty
rank_to_kill = non_leader_ranks[-1]
self.log.info(
"Stopping a pool non-leader (%s): %s", non_leader_ranks, rank_to_kill)
else:
# Stop the pool new_leader if non_leader_ranks is empty
rank_to_kill = pool_leader
self.log.info(
"Pool non_leader_ranks is empty, Stopping the new pool leader: %s",
rank_to_kill)
try:
self.server_managers[-1].stop_ranks([non_leader], self.test_log)
self.server_managers[-1].stop_ranks([rank_to_kill], self.test_log)
except TestFail as error:
self.log.info(error)
self.fail(
"Error stopping a pool non-leader - "
"DaosServerManager.stop_ranks([{}])".format(non_leader))
"Error stopping a pool rank - "
"DaosServerManager.stop_ranks([{}])".format(rank_to_kill))
self.pool.wait_for_rebuild_to_start(interval=1)
self.pool.wait_for_rebuild_to_end(interval=1)
# Verify the pool leader has not changed
self.check_leader(pool_leader, False)

if non_leader_ranks:
# Verify the pool leader has not changed
self.check_leader(pool_leader, False)

self.log.info("Test passed!")
21 changes: 21 additions & 0 deletions src/tests/ftest/util/server_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from avocado import fail_on

from ClusterShell.NodeSet import NodeSet
from command_utils_base import CommonConfig, BasicParameter
from command_utils import SubprocessManager
from dmg_utils import get_dmg_command
Expand Down Expand Up @@ -158,6 +159,26 @@ def ranks(self):
"""
return {rank: value["host"] for rank, value in self._expected_states.items()}

@property
def management_service_hosts(self):
"""Get the hosts running the management service.
Returns:
NodeSet: the hosts running the management service
"""
return NodeSet.fromlist(self.get_config_value('access_points'))

@property
def management_service_ranks(self):
"""Get the ranks running the management service.
Returns:
list: a list of ranks (int) running the management service
"""
return self.get_host_ranks(self.management_service_hosts)

def get_params(self, test):
"""Get values for all of the command params from the yaml file.
Expand Down

0 comments on commit b6d0300

Please sign in to comment.