Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update for the procedures for insertion/hot swap of Switch Fabric Module(SFM) by using "config chassis modules shutdown/startup" commands #3283

Merged
merged 27 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
2b43852
* [saidump]
JunhongMao Sep 1, 2023
62aab2d
* [saidump]
JunhongMao Sep 1, 2023
5ecb3c1
Merge branch 'master' of github.com:JunhongMao/sonic-utilities
JunhongMao Sep 1, 2023
3504fdc
* [saidump]
JunhongMao Sep 5, 2023
2450c48
* [saidump]
JunhongMao Sep 6, 2023
9fa769e
Fixup based on the below PR comments.
JunhongMao Sep 19, 2023
8a3f93b
According to the testing group's advice, change the default ROUTE_TAB…
JunhongMao Sep 20, 2023
32f5607
https://github.com/sonic-net/sonic-buildimage/pull/16466 fixing based…
JunhongMao Sep 30, 2023
64ad586
Merge remote-tracking branch 'upstream/master'
JunhongMao Oct 3, 2023
867a6d1
To address the below review comments:
JunhongMao Oct 4, 2023
185bfef
Merge remote-tracking branch 'upstream/master'
JunhongMao Oct 4, 2023
86d3efa
Merge remote-tracking branch 'upstream/master'
JunhongMao Oct 12, 2023
1ca9676
Merge remote-tracking branch 'upstream/master'
JunhongMao Dec 12, 2023
1322095
[VOQ][saidump] Add saidump unit test scripts #3079
JunhongMao Dec 12, 2023
2ac9986
Merge remote-tracking branch 'upstream/master'
JunhongMao Apr 23, 2024
1bc2ab0
Revert "[VOQ][saidump] Add saidump unit test scripts #3079"
JunhongMao Apr 23, 2024
444a4eb
Update for the procedures for insertion/hot swap of Switch Fabric Mod…
JunhongMao Apr 23, 2024
003db44
fix upon review comments.
JunhongMao Apr 24, 2024
2ea3a06
fix upon review comments.
JunhongMao Apr 24, 2024
5d7719d
fix upon review comments.
JunhongMao Apr 24, 2024
6c35d0c
fix upon review comments
JunhongMao Apr 25, 2024
10b1cd1
Merge fabric_module_set_admin_status function into chassis_modules.py
JunhongMao Apr 27, 2024
fa6f17c
fix upon review comments
JunhongMao Apr 30, 2024
001e482
Add unit test codes.
JunhongMao Apr 30, 2024
76d0ada
Fix pre-commit check error and address review comments
JunhongMao May 1, 2024
aa87e55
Update hint information for "peer services"
JunhongMao May 21, 2024
118f2d7
Address a review comments
JunhongMao May 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 100 additions & 1 deletion config/chassis_modules.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
#!/usr/sbin/env python

import click

import time
import re
import subprocess
import utilities_common.cli as clicommon

TIMEOUT_SECS = 10


#
# 'chassis_modules' group ('config chassis_modules ...')
#
Expand All @@ -17,6 +22,81 @@ def modules():
"""Configure chassis modules"""
pass


def get_config_module_state(db, chassis_module_name):
config_db = db.cfgdb
fvs = config_db.get_entry('CHASSIS_MODULE', chassis_module_name)
if not fvs:
return 'up'
else:
return fvs['admin_status']


#
# Name: check_config_module_state_with_timeout
# return: True: timeout, False: not timeout
#
def check_config_module_state_with_timeout(ctx, db, chassis_module_name, state):
counter = 0
while get_config_module_state(db, chassis_module_name) != state:
time.sleep(1)
counter += 1
if counter >= TIMEOUT_SECS:
ctx.fail("get_config_module_state {} timeout".format(chassis_module_name))
return True
return False


def get_asic_list_from_db(chassisdb, chassis_module_name):
asic_list = []
asics_keys_list = chassisdb.keys("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE*")
for asic_key in asics_keys_list:
name = chassisdb.get("CHASSIS_STATE_DB", asic_key, "name")
if name == chassis_module_name:
asic_id = int(re.search(r"(\d+)$", asic_key).group())
asic_list.append(asic_id)
return asic_list


#
# Syntax: fabric_module_set_admin_status <chassis_module_name> <'up'/'down'>
#
def fabric_module_set_admin_status(db, chassis_module_name, state):
chassisdb = db.db
chassisdb.connect("CHASSIS_STATE_DB")
asic_list = get_asic_list_from_db(chassisdb, chassis_module_name)

if len(asic_list) == 0:
return

if state == "down":
for asic in asic_list:
click.echo("Stop swss@{} and peer services".format(asic))
clicommon.run_command('sudo systemctl stop swss@{}.service'.format(asic))

is_active = subprocess.call(["systemctl", "is-active", "--quiet", "swss@{}.service".format(asic)])

if is_active == 0: # zero active, non-zero, inactive
click.echo("Stop swss@{} and peer services failed".format(asic))
return

click.echo("Delete related CAHSSIS_FABRIC_ASIC_TABLE entries")

JunhongMao marked this conversation as resolved.
Show resolved Hide resolved
for asic in asic_list:
chassisdb.delete("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic" + str(asic))

# Start the services in case of the users just execute issue command "systemctl stop swss@/syncd@"
# without bring down the hardware
for asic in asic_list:
JunhongMao marked this conversation as resolved.
Show resolved Hide resolved
# To address systemd service restart limit by resetting the count
clicommon.run_command('sudo systemctl reset-failed swss@{}.service'.format(asic))
JunhongMao marked this conversation as resolved.
Show resolved Hide resolved
click.echo("Start swss@{} and peer services".format(asic))
clicommon.run_command('sudo systemctl start swss@{}.service'.format(asic))
elif state == "up":
for asic in asic_list:
JunhongMao marked this conversation as resolved.
Show resolved Hide resolved
click.echo("Start swss@{} and peer services".format(asic))
clicommon.run_command('sudo systemctl start swss@{}.service'.format(asic))
JunhongMao marked this conversation as resolved.
Show resolved Hide resolved

#
# 'shutdown' subcommand ('config chassis_modules shutdown ...')
#
Expand All @@ -33,8 +113,17 @@ def shutdown_chassis_module(db, chassis_module_name):
not chassis_module_name.startswith("FABRIC-CARD"):
ctx.fail("'module_name' has to begin with 'SUPERVISOR', 'LINE-CARD' or 'FABRIC-CARD'")

# To avoid duplicate operation
if get_config_module_state(db, chassis_module_name) == 'down':
click.echo("Module {} is already in down state".format(chassis_module_name))
return

click.echo("Shutting down chassis module {}".format(chassis_module_name))
fvs = {'admin_status': 'down'}
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, fvs)
if chassis_module_name.startswith("FABRIC-CARD"):
if not check_config_module_state_with_timeout(ctx, db, chassis_module_name, 'down'):
fabric_module_set_admin_status(db, chassis_module_name, 'down')

#
# 'startup' subcommand ('config chassis_modules startup ...')
Expand All @@ -45,5 +134,15 @@ def shutdown_chassis_module(db, chassis_module_name):
def startup_chassis_module(db, chassis_module_name):
"""Chassis-module startup of module"""
config_db = db.cfgdb
ctx = click.get_current_context()

# To avoid duplicate operation
if get_config_module_state(db, chassis_module_name) == 'up':
click.echo("Module {} is already set to up state".format(chassis_module_name))
return

click.echo("Starting up chassis module {}".format(chassis_module_name))
config_db.set_entry('CHASSIS_MODULE', chassis_module_name, None)
if chassis_module_name.startswith("FABRIC-CARD"):
if not check_config_module_state_with_timeout(ctx, db, chassis_module_name, 'up'):
fabric_module_set_admin_status(db, chassis_module_name, 'up')
113 changes: 113 additions & 0 deletions tests/chassis_modules_test.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import tests.mock_tables.dbconnector
from utilities_common.db import Db
from .utils import get_result_and_return_code
from unittest import mock
sys.modules['clicommon'] = mock.Mock()

show_linecard0_shutdown_output="""\
LINE-CARD0 line-card 1 Empty down LC1000101
Expand All @@ -15,6 +17,15 @@
show_linecard0_startup_output="""\
LINE-CARD0 line-card 1 Empty up LC1000101
"""

show_fabriccard0_shutdown_output = """\
FABRIC-CARD0 fabric-card 17 Online down FC1000101
"""

show_fabriccard0_startup_output = """\
FABRIC-CARD0 fabric-card 17 Online up FC1000101
"""

header_lines = 2
warning_lines = 0

Expand Down Expand Up @@ -113,6 +124,11 @@
Linecard4|Asic2|PortChannel0001 2 22 Linecard4|Asic2|Ethernet29, Linecard4|Asic2|Ethernet30
"""


def mock_run_command_side_effect(*args, **kwargs):
return '', 0


class TestChassisModules(object):
@classmethod
def setup_class(cls):
Expand Down Expand Up @@ -186,6 +202,47 @@ def test_config_shutdown_module(self):
#db.cfgdb.set_entry("CHASSIS_MODULE", "LINE-CARD0", { "admin_status" : "down" })
#db.get_data("CHASSIS_MODULE", "LINE-CARD0")

def test_config_shutdown_module_fabric(self):
with mock.patch("utilities_common.cli.run_command",
mock.MagicMock(side_effect=mock_run_command_side_effect)) as mock_run_command:
runner = CliRunner()
db = Db()

chassisdb = db.db
chassisdb.connect("CHASSIS_STATE_DB")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "asic_id_in_module", "0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "asic_pci_address", "nokia-bdb:4:0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "name", "FABRIC-CARD0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "asic_id_in_module", "1")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "asic_pci_address", "nokia-bdb:4:1")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "name", "FABRIC-CARD0")
chassisdb.close("CHASSIS_STATE_DB")

result = runner.invoke(config.config.commands["chassis"].commands["modules"].commands["shutdown"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
assert result.exit_code == 0

result = runner.invoke(show.cli.commands["chassis"].commands["modules"].commands["status"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
result_lines = result.output.strip('\n').split('\n')
assert result.exit_code == 0
header_lines = 2
result_out = " ".join((result_lines[header_lines]).split())
assert result_out.strip('\n') == show_fabriccard0_shutdown_output.strip('\n')

fvs = {'admin_status': 'down'}
db.cfgdb.set_entry('CHASSIS_MODULE', "FABRIC-CARD0", fvs)
result = runner.invoke(config.config.commands["chassis"].commands["modules"].commands["shutdown"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
assert result.exit_code == 0
assert mock_run_command.call_count == 6

def test_config_startup_module(self):
runner = CliRunner()
db = Db()
Expand All @@ -202,6 +259,62 @@ def test_config_startup_module(self):
result_out = " ".join((result_lines[header_lines]).split())
assert result_out.strip('\n') == show_linecard0_startup_output.strip('\n')

def test_config_startup_module_fabric(self):
JunhongMao marked this conversation as resolved.
Show resolved Hide resolved
with mock.patch("utilities_common.cli.run_command",
mock.MagicMock(side_effect=mock_run_command_side_effect)) as mock_run_command:
runner = CliRunner()
db = Db()

chassisdb = db.db
chassisdb.connect("CHASSIS_STATE_DB")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "asic_id_in_module", "0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "asic_pci_address", "nokia-bdb:4:0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic6", "name", "FABRIC-CARD0")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "asic_id_in_module", "1")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "asic_pci_address", "nokia-bdb:4:1")
chassisdb.set("CHASSIS_STATE_DB", "CHASSIS_FABRIC_ASIC_TABLE|asic7", "name", "FABRIC-CARD0")
chassisdb.close("CHASSIS_STATE_DB")

# FC is down and doing startup
fvs = {'admin_status': 'down'}
db.cfgdb.set_entry('CHASSIS_MODULE', "FABRIC-CARD0", fvs)

result = runner.invoke(config.config.commands["chassis"].commands["modules"].commands["startup"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
assert result.exit_code == 0

result = runner.invoke(show.cli.commands["chassis"].commands["modules"].commands["status"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
result_lines = result.output.strip('\n').split('\n')
assert result.exit_code == 0
result_out = " ".join((result_lines[header_lines]).split())
assert result_out.strip('\n') == show_fabriccard0_startup_output.strip('\n')
assert mock_run_command.call_count == 2

# FC is up and doing startup
fvs = {'admin_status': 'up'}
db.cfgdb.set_entry('CHASSIS_MODULE', "FABRIC-CARD0", fvs)

result = runner.invoke(config.config.commands["chassis"].commands["modules"].commands["startup"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
assert result.exit_code == 0

result = runner.invoke(show.cli.commands["chassis"].commands["modules"].commands["status"],
["FABRIC-CARD0"], obj=db)
print(result.exit_code)
print(result.output)
result_lines = result.output.strip('\n').split('\n')
assert result.exit_code == 0
result_out = " ".join((result_lines[header_lines]).split())
assert result_out.strip('\n') == show_fabriccard0_startup_output.strip('\n')
assert mock_run_command.call_count == 2

def test_config_incorrect_module(self):
runner = CliRunner()
db = Db()
Expand Down
Loading