From 40794dbbd29ad11089b82b10dcba97f5572bd0f8 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Sun, 17 Dec 2023 14:02:47 +0800 Subject: [PATCH] [Mellanox] implement sfp.reset for CMIS management (#16862) - Why I did it For CMIS host management module, we need a different implementation for sfp.reset. This PR is to implement it - How I did it For SW control modules, do reset from hw_reset For FW control modules, do reset as the original way - How to verify it Manual test sonic-mgmt platform test --- .../mlnx-platform-api/sonic_platform/sfp.py | 72 +++++++++++++++---- .../mlnx-platform-api/sonic_platform/utils.py | 6 +- .../mlnx-platform-api/tests/test_sfp.py | 21 ++++-- 3 files changed, 77 insertions(+), 22 deletions(-) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py index c5bcfddaf112..d03c0fe10e79 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py @@ -26,6 +26,7 @@ import ctypes import subprocess import os + import threading from sonic_py_common.logger import Logger from sonic_py_common.general import check_output_pipe from . import utils @@ -219,6 +220,9 @@ def __exit__(self, exc_type, exc_val, exc_tb): deinitialize_sdk_handle(self.sdk_handle) class NvidiaSFPCommon(SfpOptoeBase): + sfp_index_to_logical_port_dict = {} + sfp_index_to_logical_lock = threading.Lock() + def __init__(self, sfp_index): super(NvidiaSFPCommon, self).__init__() self.index = sfp_index + 1 @@ -247,7 +251,31 @@ def _get_module_info(self, sdk_index): error_type = utils.read_int_from_file(status_error_file_path) return oper_state, error_type - + + @classmethod + def get_sfp_index_to_logical_port(cls, force=False): + if not cls.sfp_index_to_logical_port_dict or force: + config_db = utils.DbUtils.get_db_instance('CONFIG_DB') + port_data = config_db.get_table('PORT') + for key, data in port_data.items(): + if data['index'] not in cls.sfp_index_to_logical_port_dict: + cls.sfp_index_to_logical_port_dict[int(data['index']) - 1] = key + + @classmethod + def get_logical_port_by_sfp_index(cls, sfp_index): + with cls.sfp_index_to_logical_lock: + cls.get_sfp_index_to_logical_port() + logical_port_name = cls.sfp_index_to_logical_port_dict.get(sfp_index) + if not logical_port_name: + cls.get_sfp_index_to_logical_port(force=True) + else: + config_db = utils.DbUtils.get_db_instance('CONFIG_DB') + current_index = int(config_db.get('CONFIG_DB', f'PORT|{logical_port_name}', 'index')) + if current_index != sfp_index: + cls.get_sfp_index_to_logical_port(force=True) + logical_port_name = cls.sfp_index_to_logical_port_dict.get(sfp_index) + return logical_port_name + class SFP(NvidiaSFPCommon): """Platform-specific SFP class""" @@ -299,6 +327,17 @@ def get_presence(self): Returns: bool: True if device is present, False if not """ + if DeviceDataManager.is_independent_mode(): + if utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control') != 0: + if not utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/hw_present'): + return False + if not utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/power_good'): + return False + if not utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/power_on'): + return False + if utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/hw_reset') == 1: + return False + eeprom_raw = self._read_eeprom(0, 1, log_on_error=False) return eeprom_raw is not None @@ -455,8 +494,17 @@ def reset(self): refer plugins/sfpreset.py """ - file_path = SFP_SDK_MODULE_SYSFS_ROOT_TEMPLATE.format(self.sdk_index) + SFP_SYSFS_RESET - return utils.write_file(file_path, '1') + try: + if not self.is_sw_control(): + file_path = SFP_SDK_MODULE_SYSFS_ROOT_TEMPLATE.format(self.sdk_index) + SFP_SYSFS_RESET + return utils.write_file(file_path, '1') + else: + file_path = SFP_SDK_MODULE_SYSFS_ROOT_TEMPLATE.format(self.sdk_index) + SFP_SYSFS_HWRESET + return utils.write_file(file_path, '0') and utils.write_file(file_path, '1') + except Exception as e: + print(f'Failed to reset module - {e}') + logger.log_error(f'Failed to reset module - {e}') + return False @classmethod @@ -918,15 +966,15 @@ def is_sw_control(self): return False db = utils.DbUtils.get_db_instance('STATE_DB') - control_type = db.get('STATE_DB', f'TRANSCEIVER_MODULES_MGMT|{self.sdk_index}', 'control_type') - control_file_value = utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control') - - if control_type == 'SW_CONTROL' and control_file_value == 1: - return True - elif control_type == 'FW_CONTROL' and control_file_value == 0: - return False - else: - raise Exception(f'Module {self.sdk_index} is in initialization, please retry later') + logical_port = NvidiaSFPCommon.get_logical_port_by_sfp_index(self.sdk_index) + if not logical_port: + raise Exception(f'Module {self.sdk_index} is not present or in initialization') + + initialized = db.exists('STATE_DB', f'TRANSCEIVER_STATUS|{logical_port}') + if not initialized: + raise Exception(f'Module {self.sdk_index} is not present or in initialization') + + return utils.read_int_from_file(f'/sys/module/sx_core/asic0/module{self.sdk_index}/control') == 1 class RJ45Port(NvidiaSFPCommon): diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py index 1135903c24bf..a7354ac7b864 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/utils.py @@ -381,9 +381,9 @@ def get_db_instance(cls, db_name, **kargs): cls.db_instances.data = {} if db_name not in cls.db_instances.data: - from swsscommon.swsscommon import SonicV2Connector - db = SonicV2Connector(use_unix_socket_path=True) - db.connect(db_name) + from swsscommon.swsscommon import ConfigDBConnector + db = ConfigDBConnector(use_unix_socket_path=True) + db.db_connect(db_name) cls.db_instances.data[db_name] = db return cls.db_instances.data[db_name] except Exception as e: diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py index dccc727bfe57..d273e9bce700 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py @@ -266,9 +266,14 @@ def test_dummy_apis(self, mock_get_xcvr_api): @mock.patch('sonic_platform.utils.write_file') def test_reset(self, mock_write): sfp = SFP(0) + sfp.is_sw_control = mock.MagicMock(return_value=False) mock_write.return_value = True assert sfp.reset() mock_write.assert_called_with('/sys/module/sx_core/asic0/module0/reset', '1') + sfp.is_sw_control.return_value = True + assert sfp.reset() + sfp.is_sw_control.side_effect = Exception('') + assert not sfp.reset() @mock.patch('sonic_platform.sfp.SFP.read_eeprom') def test_get_xcvr_api(self, mock_read): @@ -332,30 +337,32 @@ def test_get_temperature_threshold(self): assert sfp.get_temperature_warning_threashold() == 75.0 assert sfp.get_temperature_critical_threashold() == 85.0 + @mock.patch('sonic_platform.sfp.NvidiaSFPCommon.get_logical_port_by_sfp_index') @mock.patch('sonic_platform.utils.read_int_from_file') @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode') @mock.patch('sonic_platform.utils.DbUtils.get_db_instance') - def test_is_sw_control(self, mock_get_db, mock_mode, mock_read): + def test_is_sw_control(self, mock_get_db, mock_mode, mock_read, mock_get_logical): sfp = SFP(0) mock_mode.return_value = False assert not sfp.is_sw_control() mock_mode.return_value = True + + mock_get_logical.return_value = None + with pytest.raises(Exception): + sfp.is_sw_control() + mock_get_logical.return_value = 'Ethernet0' mock_db = mock.MagicMock() mock_get_db.return_value = mock_db - mock_db.get = mock.MagicMock(return_value=None) + mock_db.exists = mock.MagicMock(return_value=False) with pytest.raises(Exception): sfp.is_sw_control() + mock_db.exists.return_value = True mock_read.return_value = 0 - mock_db.get.return_value = 'FW_CONTROL' assert not sfp.is_sw_control() mock_read.return_value = 1 - mock_db.get.return_value = 'SW_CONTROL' assert sfp.is_sw_control() - mock_read.return_value = 0 - with pytest.raises(Exception): - sfp.is_sw_control() @mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode', mock.MagicMock(return_value=False)) @mock.patch('sonic_platform.sfp.SFP.is_sw_control', mock.MagicMock(return_value=False))