Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[202311][Mellanox] implement platform wait in python code (#17398) #17719

Merged
merged 1 commit into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 32 additions & 69 deletions device/mellanox/x86_64-mlnx_msn2700-r0/platform_wait
Original file line number Diff line number Diff line change
@@ -1,69 +1,32 @@
#!/bin/bash

declare -r SYSLOG_LOGGER="/usr/bin/logger"
declare -r SYSLOG_IDENTIFIER="platform_wait"
declare -r SYSLOG_ERROR="error"
declare -r SYSLOG_NOTICE="notice"
declare -r SYSLOG_INFO="info"

declare -r HW_MGMT_CONFIG="/var/run/hw-management/config"

declare -r MODULE_COUNTER="${HW_MGMT_CONFIG}/module_counter"
declare -r SFP_COUNTER="${HW_MGMT_CONFIG}/sfp_counter"

declare -r EXIT_SUCCESS="0"
declare -r EXIT_TIMEOUT="1"

function log_error() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_ERROR} $@"
}

function log_notice() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_NOTICE} $@"
}

function log_info() {
eval "${SYSLOG_LOGGER} -t ${SYSLOG_IDENTIFIER} -p ${SYSLOG_INFO} $@"
}

function wait_for_sfp() {
local -r _NUM_MATCH="^[0-9]+$"
local -r _NUM_ZERO="0"

local _MODULE_CNT="0"
local _SFP_CNT="0"

local -i _WDOG_CNT="1"
local -ir _WDOG_MAX="300"

local -r _TIMEOUT="1s"

while [[ "${_WDOG_CNT}" -le "${_WDOG_MAX}" ]]; do
_MODULE_CNT="$(cat ${MODULE_COUNTER} 2>&1)"
_SFP_CNT="$(cat ${SFP_COUNTER} 2>&1)"

if [[ "${_MODULE_CNT}" =~ ${_NUM_MATCH} && "${_SFP_CNT}" =~ ${_NUM_MATCH} ]]; then
if [[ "${_SFP_CNT}" -gt "${_NUM_ZERO}" && "${_MODULE_CNT}" -eq "${_SFP_CNT}" ]]; then
return "${EXIT_SUCCESS}"
fi
fi

let "_WDOG_CNT++"
sleep "${_TIMEOUT}"
done

return "${EXIT_TIMEOUT}"
}

log_info "Wait for SFP interfaces to be ready"

wait_for_sfp
EXIT_CODE="$?"
if [[ "${EXIT_CODE}" != "${EXIT_SUCCESS}" ]]; then
log_error "SFP interfaces are not ready: timeout"
exit "${EXIT_CODE}"
fi

log_info "SFP interfaces are ready"

exit "${EXIT_SUCCESS}"
#!/usr/bin/python3

#
# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
# Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import sys
from sonic_platform.device_data import DeviceDataManager
from sonic_py_common.logger import Logger


logger = Logger(log_identifier='platform_wait')
logger.log_notice('Nvidia: Wait for PMON dependencies to be ready')
if DeviceDataManager.wait_platform_ready():
logger.log_notice('Nvidia: PMON dependencies are ready')
sys.exit(0)
else:
logger.log_error('Nvidia: PMON dependencies are not ready: timeout')
sys.exit(-1)
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import glob
import os
import time

from . import utils

Expand Down Expand Up @@ -167,8 +168,11 @@ def is_psu_hotswapable(cls):
@classmethod
@utils.read_only_cache()
def get_sfp_count(cls):
sfp_count = utils.read_int_from_file('/run/hw-management/config/sfp_counter')
return sfp_count if sfp_count > 0 else len(glob.glob('/sys/module/sx_core/asic0/module*'))
from sonic_py_common import device_info
platform_path = device_info.get_path_to_platform_dir()
platform_json_path = os.path.join(platform_path, 'platform.json')
platform_data = utils.load_json_file(platform_json_path)
return len(platform_data['chassis']['sfps'])

@classmethod
def get_linecard_sfp_count(cls, lc_index):
Expand Down Expand Up @@ -244,3 +248,23 @@ def is_independent_mode(cls):
sai_profile_file = os.path.join(hwsku_dir, 'sai.profile')
data = utils.read_key_value_file(sai_profile_file, delimeter='=')
return data.get('SAI_INDEPENDENT_MODULE_MODE') == '1'

@classmethod
def wait_platform_ready(cls):
"""
Wait for Nvidia platform related services(SDK, hw-management) ready
Returns:
bool: True if wait success else timeout
"""
conditions = []
sysfs_nodes = ['power_mode', 'power_mode_policy', 'present', 'reset', 'status', 'statuserror']
if cls.is_independent_mode():
sysfs_nodes.extend(['control', 'frequency', 'frequency_support', 'hw_present', 'hw_reset',
'power_good', 'power_limit', 'power_on', 'temperature/input'])
else:
conditions.append(lambda: utils.read_int_from_file('/var/run/hw-management/config/asics_init_done') == 1)
sfp_count = cls.get_sfp_count()
for sfp_index in range(sfp_count):
for sysfs_node in sysfs_nodes:
conditions.append(lambda: os.path.exists(f'/sys/module/sx_core/asic0/module{sfp_index}/{sysfs_node}'))
return utils.wait_until_conditions(conditions, 300, 1)
24 changes: 24 additions & 0 deletions platform/mellanox/mlnx-platform-api/sonic_platform/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,30 @@ def wait_until(predict, timeout, interval=1, *args, **kwargs):
return False


def wait_until_conditions(conditions, timeout, interval=1):
"""
Wait until all the conditions become true
Args:
conditions (list): a list of callable which generate True|False
timeout (int): wait time in seconds
interval (int, optional): interval to check the predict. Defaults to 1.

Returns:
bool: True if wait success else False
"""
while timeout > 0:
pending_conditions = []
for condition in conditions:
if not condition():
pending_conditions.append(condition)
if not pending_conditions:
return True
conditions = pending_conditions
time.sleep(interval)
timeout -= interval
return False


class TimerEvent:
def __init__(self, interval, cb, repeat):
self.interval = interval
Expand Down
24 changes: 22 additions & 2 deletions platform/mellanox/mlnx-platform-api/tests/test_device_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,26 @@ def test_is_independent_mode(self, mock_read):
mock_read.return_value = {'SAI_INDEPENDENT_MODULE_MODE': '1'}
assert DeviceDataManager.is_independent_mode()

@mock.patch('sonic_py_common.device_info.get_path_to_platform_dir', mock.MagicMock(return_value='/tmp'))
@mock.patch('sonic_platform.device_data.utils.load_json_file')
def test_get_sfp_count(self, mock_load_json):
mock_load_json.return_value = {
'chassis': {
'sfps': [1,2,3]
}
}
assert DeviceDataManager.get_sfp_count() == 3



@mock.patch('sonic_platform.device_data.time.sleep', mock.MagicMock())
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_sfp_count', mock.MagicMock(return_value=3))
@mock.patch('sonic_platform.device_data.utils.read_int_from_file', mock.MagicMock(return_value=1))
@mock.patch('sonic_platform.device_data.os.path.exists')
@mock.patch('sonic_platform.device_data.DeviceDataManager.is_independent_mode')
def test_wait_platform_ready(self, mock_is_indep, mock_exists):
mock_exists.return_value = True
mock_is_indep.return_value = True
assert DeviceDataManager.wait_platform_ready()
mock_is_indep.return_value = False
assert DeviceDataManager.wait_platform_ready()
mock_exists.return_value = False
assert not DeviceDataManager.wait_platform_ready()
7 changes: 7 additions & 0 deletions platform/mellanox/mlnx-platform-api/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,13 @@ def test_read_key_value_file(self):
mock_os_open = mock.mock_open(read_data='a=b')
with mock.patch('sonic_platform.utils.open', mock_os_open):
assert utils.read_key_value_file('some_file', delimeter='=') == {'a':'b'}

@mock.patch('sonic_platform.utils.time.sleep', mock.MagicMock())
def test_wait_until_conditions(self):
conditions = [lambda: True]
assert utils.wait_until_conditions(conditions, 1)
conditions = [lambda: False]
assert not utils.wait_until_conditions(conditions, 1)

def test_timer(self):
timer = utils.Timer()
Expand Down
Loading