Skip to content

Commit

Permalink
[Mellanox] Add new thermal sensors for SN5600 (sonic-net#12671)
Browse files Browse the repository at this point in the history
- Why I did it
Add new thermal sensors for SN5600

- How I did it
Add new thermal sensors for SN5600: PCH and SODIMM

- How to verify it
Manual test
  • Loading branch information
Junchao-Mellanox authored Nov 14, 2022
1 parent 39ebf80 commit 20d885d
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 4 deletions.
11 changes: 11 additions & 0 deletions platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@
}
},
'x86_64-nvidia_sn5600-r0': {
'thermal': {
"capability": {
"comex_amb": False,
"pch_temp": True
}
}
}
}

Expand Down Expand Up @@ -225,6 +231,11 @@ def get_gearbox_count(cls, sysfs_folder):
def get_cpu_thermal_count(cls):
return len(glob.glob('run/hw-management/thermal/cpu_core[!_]'))

@classmethod
@utils.read_only_cache()
def get_sodimm_thermal_count(cls):
return len(glob.glob('/run/hw-management/thermal/sodimm*_temp_input'))

@classmethod
@utils.read_only_cache()
def get_minimum_table(cls):
Expand Down
16 changes: 15 additions & 1 deletion platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,18 @@
"name": "Ambient Switch Board Temp",
"temperature": "swb_amb",
"default_present": False
},
{
"name": "PCH Temp",
"temperature": "pch_temp",
"default_present": False
},
{
"name": "SODIMM {} Temp",
"temperature": "sodimm{}_temp_input",
"high_threshold": "sodimm{}_temp_max",
"high_critical_threshold": "sodimm{}_temp_crit",
"type": "indexable",
}
],
'linecard thermals': {
Expand Down Expand Up @@ -161,6 +173,8 @@ def initialize_chassis_thermals():
count = DeviceDataManager.get_gearbox_count('/run/hw-management/config')
elif 'CPU Core' in rule['name']:
count = DeviceDataManager.get_cpu_thermal_count()
elif 'SODIMM' in rule['name']:
count = DeviceDataManager.get_sodimm_thermal_count()
if count == 0:
logger.log_debug('Failed to get thermal object count for {}'.format(rule['name']))
continue
Expand Down Expand Up @@ -524,7 +538,7 @@ def monitor_asic_themal_zone(cls):
else:
cls.expect_cooling_state = None


class RemovableThermal(Thermal):
def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position, presence_cb):
super(RemovableThermal, self).__init__(name, temp_file, high_th_file, high_crit_th_file, position)
Expand Down
22 changes: 19 additions & 3 deletions platform/mellanox/mlnx-platform-api/tests/test_thermal.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class TestThermal:
@mock.patch('os.path.exists', mock.MagicMock(return_value=True))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_gearbox_count', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_cpu_thermal_count', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_sodimm_thermal_count', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_platform_name', mock.MagicMock(return_value='x86_64-mlnx_msn2700-r0'))
def test_chassis_thermal(self):
from sonic_platform.thermal import THERMAL_NAMING_RULE
Expand All @@ -48,6 +49,7 @@ def test_chassis_thermal(self):
thermal_dict = {thermal.get_name(): thermal for thermal in thermal_list}
gearbox_thermal_rule = None
cpu_thermal_rule = None
sodimm_thermal_rule = None
for rule in THERMAL_NAMING_RULE['chassis thermals']:
thermal_type = rule.get('type', 'single')
if thermal_type == 'single':
Expand All @@ -69,9 +71,12 @@ def test_chassis_thermal(self):
gearbox_thermal_rule = rule
elif 'CPU Core' in rule['name']:
cpu_thermal_rule = rule
elif 'SODIMM' in rule['name']:
sodimm_thermal_rule = rule

gearbox_thermal_count = 0
cpu_thermal_count = 0
sodimm_thermal_count = 0
for thermal in thermal_list:
if 'Gearbox' in thermal.get_name():
start_index = gearbox_thermal_rule.get('start_index', 1)
Expand All @@ -89,21 +94,32 @@ def test_chassis_thermal(self):
assert cpu_thermal_rule['high_threshold'].format(start_index) in thermal.high_threshold
assert cpu_thermal_rule['high_critical_threshold'].format(start_index) in thermal.high_critical_threshold
cpu_thermal_count += 1
elif 'SODIMM' in thermal.get_name():
start_index = sodimm_thermal_rule.get('start_index', 1)
start_index += sodimm_thermal_count
assert thermal.get_name() == sodimm_thermal_rule['name'].format(start_index)
assert sodimm_thermal_rule['temperature'].format(start_index) in thermal.temperature
assert sodimm_thermal_rule['high_threshold'].format(start_index) in thermal.high_threshold
assert sodimm_thermal_rule['high_critical_threshold'].format(start_index) in thermal.high_critical_threshold
sodimm_thermal_count += 1

assert gearbox_thermal_count == 2
assert cpu_thermal_count == 2
assert sodimm_thermal_count == 2

@mock.patch('sonic_platform.device_data.DeviceDataManager.get_platform_name', mock.MagicMock(return_value='x86_64-nvidia_sn2201-r0'))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_thermal_capability', mock.MagicMock(return_value={'comex_amb': False, 'cpu_amb': True, 'swb_amb': True}))
def test_chassis_thermal_includes(self):
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_thermal_capability')
def test_chassis_thermal_includes(self, mock_capability):
from sonic_platform.thermal import THERMAL_NAMING_RULE
thermal_capability = {'comex_amb': False, 'cpu_amb': True, 'swb_amb': True}
mock_capability.return_value = thermal_capability
chassis = Chassis()
thermal_list = chassis.get_all_thermals()
assert thermal_list
thermal_dict = {thermal.get_name(): thermal for thermal in thermal_list}
for rule in THERMAL_NAMING_RULE['chassis thermals']:
default_present = rule.get('default_present', True)
if not default_present:
if not default_present and thermal_capability.get(rule['temperature']):
thermal_name = rule['name']
assert thermal_name in thermal_dict

Expand Down

0 comments on commit 20d885d

Please sign in to comment.