Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Mellanox] Fix retry logic on discovery of MST device #100

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 1 addition & 18 deletions files/scripts/syncd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,6 @@

. /usr/local/bin/syncd_common.sh

declare -r UNKN_MST="unknown"

function GetMstDevice() {
local _MST_DEVICE="$(ls /dev/mst/*_pci_cr0 2>&1)"

if [[ ! -c "${_MST_DEVICE}" ]]; then
echo "${UNKN_MST}"
else
echo "${_MST_DEVICE}"
fi
}

function startplatform() {

# platform specific tasks
Expand All @@ -36,12 +24,7 @@ function startplatform() {
debug "Starting Firmware update procedure"
/usr/bin/mst start --with_i2cdev

local -r _MST_DEVICE="$(GetMstDevice)"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can remove GetMstDevice method from the syncd.sh i suppose

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

if [[ "${_MST_DEVICE}" != "${UNKN_MST}" ]]; then
/usr/bin/flint -d $_MST_DEVICE --clear_semaphore
fi

/usr/bin/mlnx-fw-upgrade.sh -v
/usr/bin/mlnx-fw-upgrade.sh -c -v
if [[ "$?" -ne "${EXIT_SUCCESS}" ]]; then
debug "Failed to upgrade fw. " "$?" "Restart syncd"
exit 1
Expand Down
31 changes: 25 additions & 6 deletions platform/mellanox/mlnx-fw-upgrade.j2
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ function PrintHelp() {
echo " -s, --syslog Use syslog logger (enabled when -u|--upgrade)"
echo " -v, --verbose Verbose mode (enabled when -u|--upgrade)"
echo " -d, --dry-run Compare the FW versions without installation. Return code "0" means the FW is up-to-date, return code "10" means an upgrade is required, otherwise an error is detected."
echo " -c, --clear-semaphore Clear hw resources before updating firmware"
echo " -h, --help Print help"
echo
echo "Examples:"
Expand All @@ -103,6 +104,9 @@ function ParseArguments() {
-d|--dry-run)
DRY_RUN="${YES_PARAM}"
;;
-c|--clear-semaphore)
CLEAR_SEMAPHORE="${YES_PARAM}"
;;
-h|--help)
PrintHelp
exit "${EXIT_SUCCESS}"
Expand Down Expand Up @@ -210,16 +214,20 @@ function WaitForDevice() {
local -i QUERY_RETRY_COUNT_MAX="10"
local -i QUERY_RETRY_COUNT="0"
local -r DEVICE_TYPE=$(GetMstDeviceType)
local SPC_MST_DEV
local QUERY_RC=""

local SPC_MST_DEV=$(GetSPCMstDevice)

while [[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("${SPC_MST_DEV}" == "${UNKN_MST}") ]]; do
while : ; do
SPC_MST_DEV=$(GetSPCMstDevice)
${QUERY_XML} -d ${SPC_MST_DEV} -o ${QUERY_FILE}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only adding 0.2 - 03 sec in SPC1 device under a high load. So, doesn't look a big hit to me

QUERY_RC="$?"
[[ ("${QUERY_RETRY_COUNT}" -lt "${QUERY_RETRY_COUNT_MAX}") && ("${QUERY_RC}" != "${EXIT_SUCCESS}") ]] || break
sleep 1s
((QUERY_RETRY_COUNT++))
SPC_MST_DEV=$(GetSPCMstDevice)
LogInfo "Retrying MST device query ${QUERY_RETRY_COUNT}"
done

if [[ "${SPC_MST_DEV}" == "${UNKN_MST}" ]]; then
if [[ "${QUERY_RC}" != "${EXIT_SUCCESS}" ]]; then
# Couldn't Detect the Spectrum ASIC. Exit failure and print the detailed information
output=$(${QUERY_CMD})
failure_msg="${output#*Fail : }"
Expand Down Expand Up @@ -265,7 +273,7 @@ function GetSPCMstDevice() {

if [[ ! -c "${_MST_DEVICE}" ]]; then
echo "${UNKN_MST}"
else
else
echo "${_MST_DEVICE}"
fi

Expand Down Expand Up @@ -482,6 +490,15 @@ function Cleanup() {
fi
}

function ClearSemaphore() {
if [[ "${CLEAR_SEMAPHORE}" == "${YES_PARAM}" ]]; then
local -r _MST_DEVICE="$(GetSPCMstDevice)"
if [[ "${_MST_DEVICE}" != "${UNKN_MST}" ]]; then
/usr/bin/flint -d $_MST_DEVICE --clear_semaphore
fi
fi
}

trap Cleanup EXIT

ParseArguments "$@"
Expand All @@ -492,6 +509,8 @@ LockStateChange

WaitForDevice

ClearSemaphore

if [ "${IMAGE_UPGRADE}" != "${YES_PARAM}" ]; then
UpgradeFW
else
Expand Down