Skip to content

Commit

Permalink
[Mellanox] Update HW-MGMT package to new version V.7.0030.1011 (#16239)
Browse files Browse the repository at this point in the history
- Why I did it
1. Update Mellanox HW-MGMT package to newer version V.7.0030.1011
2. Replace the SONiC PMON Thermal control algorithm with the one inside the HW-MGMT package on all Nvidia platforms
3. Support Spectrum-4 systems

- How I did it
1. Update the HW-MGMT package version number and submodule pointer
2. Remove the thermal control algorithm implementation from Mellanox platform API
3. Revise the patch to HW-MGMT package which will disable HW-MGMT from running on SIMX
4. Update the downstream kernel patch list

Signed-off-by: Kebo Liu <kebol@nvidia.com>
  • Loading branch information
keboliu authored and mssonicbld committed Sep 21, 2023
1 parent 60f5610 commit 27f15d4
Show file tree
Hide file tree
Showing 195 changed files with 36,511 additions and 3,133 deletions.
76 changes: 4 additions & 72 deletions device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json
Original file line number Diff line number Diff line change
@@ -1,80 +1,12 @@
{
{
"thermal_control_algorithm": {
"run_at_boot_up": "true",
"fan_speed_when_suspend": "60"
},
"info_types": [
{
"type": "fan_info"
},
{
"type": "psu_info"
},
{
"type": "chassis_info"
}

],
"policies": [
{
"name": "any fan absence",
"conditions": [
{
"type": "fan.any.absence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any psu absence",
"conditions": [
{
"type": "psu.any.absence"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "any fan broken",
"conditions": [
{
"type": "fan.any.fault"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence",
"conditions": [
{
"type": "fan.all.presence"
},
{
"type": "psu.all.presence"
},
{
"type": "fan.all.good"
}
],
"actions": [
{
"type": "thermal.recover"
}
]
}

]
}
}
4 changes: 2 additions & 2 deletions platform/mellanox/hw-management.mk
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2016-2022 NVIDIA CORPORATION & AFFILIATES.
# Copyright (c) 2016-2023 NVIDIA CORPORATION & AFFILIATES.
# Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -16,7 +16,7 @@
#
# Mellanox HW Management

MLNX_HW_MANAGEMENT_VERSION = 7.0020.4301
MLNX_HW_MANAGEMENT_VERSION = 7.0030.1011

export MLNX_HW_MANAGEMENT_VERSION

Expand Down
Original file line number Diff line number Diff line change
@@ -1,69 +1,103 @@
From 422b64397f2f33b394d037820f0ceb4c09e3a725 Mon Sep 17 00:00:00 2001
From: Alexander Allen <arallen@nvidia.com>
Date: Fri, 21 Jan 2022 16:47:19 +0000
Subject: [PATCH 2/4] Disable hw-mgmt on SimX platforms
From eb3a76d7fbd0cbf2c370ecadd912960b094403d6 Mon Sep 17 00:00:00 2001
From: Junchao-Mellanox <junchao@nvidia.com>
Date: Wed, 23 Aug 2023 14:33:44 +0800
Subject: [PATCH] [PATCH 2/4] Disable hw-mgmt on SimX platforms

---
usr/usr/bin/hw-management-ready.sh | 11 +++++++----
usr/usr/bin/hw-management.sh | 9 +++++++++
2 files changed, 16 insertions(+), 4 deletions(-)
usr/usr/bin/hw-management-ready.sh | 3 ---
usr/usr/bin/hw-management.sh | 21 +++++++++------------
2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/usr/usr/bin/hw-management-ready.sh b/usr/usr/bin/hw-management-ready.sh
index 88672a8..7558c68 100755
index 840bb0b..2f8547f 100755
--- a/usr/usr/bin/hw-management-ready.sh
+++ b/usr/usr/bin/hw-management-ready.sh
@@ -51,17 +51,20 @@ if [ -d /var/run/hw-management ]; then
rm -fr /var/run/hw-management
fi

-case $board_type in
-VMOD0014)
+if [ -z "$(lspci -vvv | grep SimX)" ]; then
+ case $board_type in
+ VMOD0014)
if [ ! -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]; then
timeout 180 bash -c 'until [ -d /sys/devices/pci0000:00/0000:00:1f.0/NVSN2201:00/mlxreg-hotplug/hwmon ]; do sleep 0.2; done'
fi
;;
-*)
+ *)
if [ ! -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]; then
timeout 180 bash -c 'until [ -d /sys/devices/platform/mlxplat/mlxreg-hotplug/hwmon ]; do sleep 0.2; done'
fi
;;
-esac
+ esac
+fi
+
echo "Start Chassis HW management service."
logger -t hw-management -p daemon.notice "Start Chassis HW management service."
@@ -56,9 +56,6 @@ fi
# environment, TC need to be stopped.
if [ -n "$(lspci -vvv | grep SimX)" ]; then
case $product_sku in
- HI130|HI122)
- # Let the TC continue to run
- ;;
*)
if systemctl is-enabled --quiet hw-management-tc; then
echo "Stopping and disabling hw-management-tc on SimX"
diff --git a/usr/usr/bin/hw-management.sh b/usr/usr/bin/hw-management.sh
index 1ee05b5..50d922b 100755
index d3914d1..a60dba9 100755
--- a/usr/usr/bin/hw-management.sh
+++ b/usr/usr/bin/hw-management.sh
@@ -2310,6 +2310,13 @@ do_chip_down()
/usr/bin/hw-management-thermal-events.sh change hotplug_asic down %S %p
}
@@ -545,7 +545,7 @@ function restore_i2c_bus_frequency_default()
function find_regio_sysfs_path_helper()
{
# Find hwmon{n} sysfs path for regio device
- case $board_type in
+ case $board_type in
VMOD0014)
for path in /sys/devices/pci0000:00/*/NVSN2201:*/mlxreg-io/hwmon/hwmon*; do
if [ -d "$path" ]; then
@@ -732,10 +732,10 @@ set_jtag_gpio()
gpio_tdi=$((gpiobase+jtag_tdi))
echo $gpio_tdi > /sys/class/gpio/"$export_unexport"

+check_simx()
+{
+ if [ -n "$(lspci -vvv | grep SimX)" ]; then
+ exit 0
- # In SN2201 system.
+ # In SN2201 system.
# GPIO0 for CPU request to reset the Main Board I2C Mux.
- # GPIO1 for CPU control the CPU Board MUX when doing the ISP programming.
- # GPIO13 for CPU request Main Board JTAG control signal.
+ # GPIO1 for CPU control the CPU Board MUX when doing the ISP programming.
+ # GPIO13 for CPU request Main Board JTAG control signal.
if [ "$board_type" == "VMOD0014" ]; then
mux_reset=27
jtag_mux_en=33
@@ -1294,7 +1294,7 @@ connect_msn4700_msn4600_A1()
# msn4600C with removed A2D
connect_table+=(${msn4600C_A1_base_connect_table[@]})
else
- # msn4700/msn4600 respin
+ # msn4700/msn4600 respin
connect_table+=(${msn4700_msn4600_A1_base_connect_table[@]})
fi
add_cpu_board_to_connection_table
@@ -2144,7 +2144,7 @@ create_symbolic_links()
fi
if [ ! -d $thermal_path ]; then
mkdir $thermal_path
- fi
+ fi
+}
+
__usage="
Usage: $(basename "$0") [Options]

@@ -2335,6 +2342,8 @@ Options:
force-reload Performs hw-management 'stop' and the 'start.
"
if [ ! -d $config_path ]; then
mkdir $config_path
fi
@@ -2341,7 +2341,7 @@ do_start()
check_system
set_asic_pci_id

+check_simx
+
case $ACTION in
start)
if [ -d /var/run/hw-management ]; then
- asic_control=$(< $config_path/asic_control)
+ asic_control=$(< $config_path/asic_control)
if [[ $asic_control -ne 0 ]]; then
get_asic_bus
get_asic2_bus
@@ -2379,9 +2379,9 @@ do_start()
else
ln -sf /etc/sensors3.conf $config_path/lm_sensors_config
fi
- if [ -v "lm_sensors_labels" ] && [ -f $lm_sensors_labels ]; then
+ if [ -v "lm_sensors_labels" ] && [ -f $lm_sensors_labels ]; then
ln -sf $lm_sensors_labels $config_path/lm_sensors_labels
- fi
+ fi
if [ -v "thermal_control_config" ] && [ -f $thermal_control_config ]; then
ln -sf $thermal_control_config $config_path/tc_config.json
else
@@ -2528,9 +2528,6 @@ do_chip_up_down()
check_simx()
{
case $sku in
- HI130|HI122)
- # Let the initialization go through
- ;;
*)
if [ -n "$(lspci -vvv | grep SimX)" ]; then
exit 0
--
2.20.1
1.9.1

2 changes: 1 addition & 1 deletion platform/mellanox/hw-management/hw-mgmt
Submodule hw-mgmt updated 350 files
114 changes: 110 additions & 4 deletions platform/mellanox/hw-management/hwmgmt_nonup_patches
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
# Current non-upstream patch list, should be updated by hwmgmt_kernel_patches.py script
0049-leds-mlxreg-Provide-conversion-for-hardware-LED-colo.patch
0050-leds-mlxreg-Skip-setting-LED-color-during-initializa.patch
0051-leds-mlxreg-Allow-multi-instantiation-of-same-name-L.patch
0098-1-Revert-mlxsw-Use-u16-for-local_port-field.patch
0098-2-Revert-mlxsw-i2c-Fix-chunk-size-setting.patch
0098-3-Revert-mlxsw-core_hwmon-Adjust-module-label-names.patch
0099-mlxsw-core_hwmon-Fix-variable-names-for-hwmon-attrib.patch
0100-mlxsw-core_thermal-Rename-labels-according-to-naming.patch
0101-mlxsw-core_thermal-Remove-obsolete-API-for-query-res.patch
Expand Down Expand Up @@ -57,7 +63,107 @@
0154-mlxsw-core-Export-line-card-API.patch
0155-mlxsw-minimal-Add-system-event-handler.patch
0156-mlxsw-minimal-Add-interfaces-for-line-card-initializ.patch
0163-platform-mellanox-Introduce-support-for-rack-manager.patch
0176-platform-mellanox-fix-reset_pwr_converter_fail-attri.patch
0177-Documentation-ABI-fix-description-of-fix-reset_pwr_c.patch
0178-platform-mellanox-Introduce-support-for-next-generat.patch
0167-DS-lan743x-Add-support-for-fixed-phy.patch
0168-TMP-mlxsw-minimal-Ignore-error-reading-SPAD-register.patch
0171-platform-mellanox-mlxreg-lc-Fix-cleanup-on-failure-a.patch
0172-DS-platform-mlx-platform-Add-SPI-path-for-rack-switc.patch
0174-DS-mlxsw-core_linecards-Skip-devlink-and-provisionin.patch
0181-Revert-Fix-out-of-bounds-memory-accesses-in-thermal.patch
0182-platform-mellanox-Introduce-support-of-new-Nvidia-L1.patch
0183-platform-mellanox-Split-initialization-procedure.patch
0184-platform-mellanox-Split-logic-in-init-and-exit-flow.patch
0185-platform-mellanox-Extend-all-systems-with-I2C-notifi.patch
0187-platform_data-mlxreg-Add-field-with-mapped-resource-.patch
0188-i2c-mux-Add-register-map-based-mux-driver.patch
0189-i2c-mlxcpld-Allow-driver-to-run-on-ARM64-architectur.patch
0190-i2c-mlxcpld-Modify-base-address-type.patch
0191-i2c-mlxcpld-Allow-to-configure-base-address-of-regis.patch
0192-i2c-mlxcpld-Add-support-for-extended-transaction-len.patch
0193-platform-mellanox-mlx-platform-Add-mux-selection-reg.patch
0194-platform-mellanox-mlx-platform-Move-bus-shift-assign.patch
0195-platform-mellanox-Add-support-for-dynamic-I2C-channe.patch
0196-platform-mellanox-Relocate-mlx-platform-driver.patch
0197-platform-mellanox-Add-initial-support-for-PCIe-based.patch
0198-platform-mellanox-Introduce-support-for-switches-bas.patch
0199-platform-mellanox-mlx-platform-Add-reset-and-extend-.patch
0200-dt-bindings-i2c-mellanox-i2c-mlxbf-convert-txt-to-YA.patch
0203-i2c-mlxbf-remove-IRQF_ONESHOT.patch
0206-i2c-mlxbf-add-multi-slave-functionality.patch
0207-i2c-mlxbf-support-BlueField-3-SoC.patch
0208-i2c-mlxbf-remove-device-tree-support.patch
0209-UBUNTU-SAUCE-i2c-mlxbf.c-Add-driver-version.patch
0210-platform-mellanox-Typo-fix-in-the-file-mlxbf-bootctl.patch
0211-UBUNTU-SAUCE-platform-mellanox-Updates-to-mlxbf-boot.patch
0212-platform-mellanox-mlxbf-pmc-Add-Mellanox-BlueField-P.patch
0213-platform-mellanox-mlxbf-pmc-fix-kernel-doc-notation.patch
0214-platform-mellanox-mlxbf-pmc-Fix-an-IS_ERR-vs-NULL-bu.patch
0215-UBUNTU-SAUCE-platform-mellanox-Updates-to-mlxbf-pmc.patch
0216-UBUNTU-SAUCE-mlxbf_pmc-Fix-references-to-sprintf.patch
0217-UBUNTU-SAUCE-mlxbf-pmc-Fix-error-when-reading-unprog.patch
0218-UBUNTU-SAUCE-platform-mellanox-Add-mlx-trio-driver.patch
0219-UBUNTU-SAUCE-platform-mellanox-mlxbf-tmfifo-Add-Blue.patch
0220-UBUNTU-SAUCE-pka-Add-pka-driver.patch
0221-UBUNTU-SAUCE-platform-mellanox-Add-mlxbf-livefish-dr.patch
0222-workqueue-Add-resource-managed-version-of-delayed-wo.patch
0223-devm-helpers-Fix-devm_delayed_work_autocancel-kernel.patch
0224-devm-helpers-Add-resource-managed-version-of-work-in.patch
0225-UBUNTU-SAUCE-Add-support-to-pwr-mlxbf.c-driver.patch
0226-Add-Mellanox-BlueField-Gigabit-Ethernet-driver.patch
0227-mlxbf_gige-clear-valid_polarity-upon-open.patch
0228-net-mellanox-mlxbf_gige-Replace-non-standard-interru.patch
0229-mlxbf_gige-increase-MDIO-polling-rate-to-5us.patch
0230-mlxbf_gige-remove-driver-managed-interrupt-counts.patch
0231-mlxbf_gige-remove-own-module-name-define-and-use-KBU.patch
0232-UBUNTU-SAUCE-mlxbf_gige-add-ethtool-mlxbf_gige_set_r.patch
0233-UBUNTU-SAUCE-Fix-OOB-handling-RX-packets-in-heavy-tr.patch
0234-UBUNTU-SAUCE-mlxbf_gige-add-validation-of-ACPI-table.patch
0235-UBUNTU-SAUCE-mlxbf_gige-set-driver-version-to-1.27.patch
0236-UBUNTU-SAUCE-mlxbf_gige-clear-MDIO-gateway-lock-afte.patch
0237-mlxbf_gige-compute-MDIO-period-based-on-i1clk.patch
0238-net-mlxbf_gige-Fix-an-IS_ERR-vs-NULL-bug-in-mlxbf_gi.patch
0239-UBUNTU-SAUCE-mlxbf_gige-add-MDIO-support-for-BlueFie.patch
0240-UBUNTU-SAUCE-mlxbf_gige-support-10M-100M-1G-speeds-o.patch
0241-UBUNTU-SAUCE-mlxbf_gige-add-BlueField-3-Serdes-confi.patch
0242-UBUNTU-SAUCE-mlxbf_gige-add-BlueField-3-ethtool_ops.patch
0243-UBUNTU-SAUCE-bluefield_edac-Add-SMC-support.patch
0244-UBUNTU-SAUCE-bluefield_edac-Update-license-and-copyr.patch
0245-gpio-mlxbf2-Convert-to-device-PM-ops.patch
0246-gpio-mlxbf2-Drop-wrong-use-of-ACPI_PTR.patch
0247-gpio-mlxbf2-Use-devm_platform_ioremap_resource.patch
0248-gpio-mlxbf2-Use-DEFINE_RES_MEM_NAMED-helper-macro.patch
0249-gpio-mlxbf2-Introduce-IRQ-support.patch
0250-UBUNTU-SAUCE-gpio-mlxbf2.c-support-driver-version.patch
0251-mmc-sdhci-of-dwcmshc-add-rockchip-platform-support.patch
0252-mmc-sdhci-of-dwcmshc-add-ACPI-support-for-BlueField-.patch
0253-mmc-sdhci-of-dwcmshc-fix-error-return-code-in-dwcmsh.patch
0254-mmc-sdhci-of-dwcmshc-set-MMC_CAP_WAIT_WHILE_BUSY.patch
0255-mmc-sdhci-of-dwcmshc-Re-enable-support-for-the-BlueF.patch
0256-UBUNTU-SAUCE-Support-BlueField-3-GPIO-driver.patch
0257-regmap-debugfs-Enable-writing-to-the-regmap-debugfs-.patch
0258-UBUNTU-SAUCE-mlx-bootctl-support-icm-carveout-eeprom.patch
0259-mmc-sdhci-of-dwcmshc-Enable-host-V4-support-for-Blue.patch
0260-UBUNTU-SAUCE-mlxbf-pka-Fix-kernel-crash-with-pka-TRN.patch
0261-mlxbf-ptm-power-and-thermal-management-debugfs-drive.patch
0262-UBUNTU-SAUCE-mlxbf-pmc-Fix-event-string-typo.patch
0263-UBUNTU-SAUCE-mlxbf-pmc-Support-for-BlueField-3-perfo.patch
0264-UBUNTU-SAUCE-platform-mellanox-Add-ctrl-message-and-.patch
0266-UBUNTU-SAUCE-mlxbf-pmc-Bug-fix-for-BlueField-3-count.patch
0267-UBUNTU-SAUCE-mmc-sdhci-of-dwcmshc-add-the-missing-de.patch
0268-DS-mlxsw-core_linecards-Disable-firmware-bundling-ma.patch
0269-platform-mellanox-Cosmetic-changes.patch
0270-platform-mellanox-Fix-order-in-exit-flow.patch
0271-platform-mellanox-Add-new-attributes.patch
0272-platform-mellanox-Change-register-offset-addresses.patch
0273-platform-mellanox-Add-field-upgrade-capability-regis.patch
0274-platform-mellanox-Modify-reset-causes-description.patch
0275-mlxsw-Use-u16-for-local_port-field-instead-of-u8.patch
0276-mlxsw-minimal-Change-type-for-local-port.patch
0277-mlxsw-i2c-Fix-chunk-size-setting-in-output-mailbox-b.patch
0278-platform-mellanox-mlx-platform-Modify-graceful-shutd.patch
0279-platform-mellanox-mlx-platform-Fix-signals-polarity-.patch
0280-platform-mellanox-mlxreg-hotplug-Extend-condition-fo.patch
0281-platform-mellanox-mlx-platform-Modify-health-and-pow.patch
0282-platform-mellanox-mlx-platform-add-support-of-5th-CP.patch
0283-mlxsw-core_hwmon-Align-modules-label-name-assignment.patch
0284-platform-mellanox-mlx-platform-fix-CPLD4-PN-report.patch
9002-TMP-fix-for-fan-minimum-speed.patch
Loading

0 comments on commit 27f15d4

Please sign in to comment.