Retry test_psu.py::TestPsuApi::test_power if power not within toleran…

…ce (sonic-net#14788) * Retry test_power calculated power above 10% If the test fails on `abs(power - (voltage*current)) < power*0.1` then retry the test. Test is repeated for a maximum of three times until it passes, else it is failed. This change is introduced to account for the stringent tolerance level of 10% and the errors that might be caused due to time differences reading each of the parameters. * Retry test_power.py::TestPsuApi::test_power if power above tolerance If the test fails on `abs(power - (voltage*current)) < power*0.1` then retry the test. The test is repeated for a maximum of three times until it passes. This change is introduced to account for the stringent tolerance level of 10% and the errors that might be caused due to time differences in reading each of the parameters. * Add check to detect occurrence of a failure before power calculation * Resolve pre-commit check issue * Refactor test_power function to improve readability * Use wait_until function for retry
sreejithsreekumaran · Nov 15, 2024 · 33b9f71 · 33b9f71
1 parent 1c865b6
commit 33b9f71
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 48 deletions.
diff --git a/tests/platform_tests/api/platform_api_test_base.py b/tests/platform_tests/api/platform_api_test_base.py
@@ -30,3 +30,6 @@ def assert_expectations(self):
             # TODO: When we move to Python 3.3+, we can use self.failed_expectations.clear() instead
             del self.failed_expectations[:]
             pytest_assert(False, err_msg)
+
+    def get_len_failed_expectations(self):
+        return len(self.failed_expectations)
diff --git a/tests/platform_tests/api/test_psu.py b/tests/platform_tests/api/test_psu.py
@@ -6,7 +6,7 @@
 from tests.common.utilities import skip_release
 from tests.platform_tests.cli.util import get_skip_mod_list
 from .platform_api_test_base import PlatformApiTestBase
-from tests.common.utilities import skip_release_for_platform
+from tests.common.utilities import skip_release_for_platform, wait_until
 
 
 ###################################################
@@ -89,6 +89,16 @@ def skip_absent_psu(self, psu_num, platform_api_conn):
             return True
         return False
 
+    def get_psu_parameter(self, psu_info, psu_parameter, get_data, message):
+        data = None
+        is_supported = self.get_psu_facts(psu_info["duthost"], psu_info["psu_id"], True, psu_parameter)
+        if is_supported:
+            data = get_data(psu_info["api"], psu_info["psu_id"])
+            if self.expect(data is not None, "Failed to retrieve {} of PSU {}".format(message, psu_info["psu_id"])):
+                self.expect(isinstance(data, float), "PSU {} {} appears incorrect".format(psu_info["psu_id"], message))
+
+        return data
+
     #
     # Functions to test methods inherited from DeviceBase class
     #
@@ -204,68 +214,61 @@ def test_power(self, duthosts, enum_rand_one_per_hwsku_hostname, localhost, plat
         ''' PSU power test '''
         duthost = duthosts[enum_rand_one_per_hwsku_hostname]
         skip_release_for_platform(duthost, ["202012", "201911", "201811"], ["arista"])
+        voltage = current = power = None
+        psu_info = {
+            "duthost": duthost,
+            "api": platform_api_conn,
+            "psu_id": None
+        }
+
+        def check_psu_power(failure_count):
+            nonlocal voltage
+            nonlocal current
+            nonlocal power
+            voltage = self.get_psu_parameter(psu_info, "voltage", psu.get_voltage, "voltage")
+            current = self.get_psu_parameter(psu_info, "current", psu.get_current, "current")
+            power = self.get_psu_parameter(psu_info, "power", psu.get_power, "power")
+
+            failure_occured = self.get_len_failed_expectations() > failure_count
+            if current and voltage and power:
+                is_within_tolerance = abs(power - (voltage*current)) < power*0.1
+                if not failure_occured and not is_within_tolerance:
+                    return False
+
+                self.expect(is_within_tolerance, "PSU {} reading does not make sense \
+                    (power:{}, voltage:{}, current:{})".format(psu_id, power, voltage, current))
+
+            return True
 
         for psu_id in range(self.num_psus):
+            failure_count = self.get_len_failed_expectations()
+            psu_info['psu_id'] = psu_id
             name = psu.get_name(platform_api_conn, psu_id)
             if name in self.psu_skip_list:
                 logger.info("skipping check for {}".format(name))
             else:
-                voltage = None
-                voltage_supported = self.get_psu_facts(duthost, psu_id, True, "voltage")
-                if voltage_supported:
-                    voltage = psu.get_voltage(platform_api_conn, psu_id)
-                    if self.expect(voltage is not None, "Failed to retrieve voltage of PSU {}".format(psu_id)):
-                        self.expect(isinstance(voltage, float), "PSU {} voltage appears incorrect".format(psu_id))
-                current = None
-                current_supported = self.get_psu_facts(duthost, psu_id, True, "current")
-                if current_supported:
-                    current = psu.get_current(platform_api_conn, psu_id)
-                    if self.expect(current is not None, "Failed to retrieve current of PSU {}".format(psu_id)):
-                        self.expect(isinstance(current, float), "PSU {} current appears incorrect".format(psu_id))
-                power = None
-                power_supported = self.get_psu_facts(duthost, psu_id, True, "power")
-                if power_supported:
-                    power = psu.get_power(platform_api_conn, psu_id)
-                    if self.expect(power is not None, "Failed to retrieve power of PSU {}".format(psu_id)):
-                        self.expect(isinstance(power, float), "PSU {} power appears incorrect".format(psu_id))
-                max_supp_power = None
-                max_power_supported = self.get_psu_facts(duthost, psu_id, True, "max_power")
-                if max_power_supported:
-                    max_supp_power = psu.get_maximum_supplied_power(platform_api_conn, psu_id)
-                    if self.expect(max_supp_power is not None,
-                                   "Failed to retrieve maximum supplied power power of PSU {}".format(psu_id)):
-                        self.expect(isinstance(max_supp_power, float),
-                                    "PSU {} maximum supplied power appears incorrect".format(psu_id))
-
-                if current is not None and voltage is not None and power is not None:
-                    self.expect(abs(power - (voltage*current)) < power*0.1, "PSU {} reading does not make sense \
-                        (power:{}, voltage:{}, current:{})".format(psu_id, power, voltage, current))
+                check_result = wait_until(30, 10, 0, check_psu_power, failure_count)
+                self.expect(check_result, "PSU {} reading does not make sense \
+                (power:{}, voltage:{}, current:{})".format(psu_id, power, voltage, current))
+
+                self.get_psu_parameter(psu_info, "max_power", psu.get_maximum_supplied_power,
+                                       "maximum supplied power")
 
                 powergood_status = psu.get_powergood_status(platform_api_conn, psu_id)
                 if self.expect(powergood_status is not None,
                                "Failed to retrieve operational status of PSU {}".format(psu_id)):
                     self.expect(powergood_status is True, "PSU {} is not operational".format(psu_id))
 
-                high_threshold = None
-                voltage_high_threshold_supported = self.get_psu_facts(duthost, psu_id, True, "voltage_high_threshold")
-                if voltage_high_threshold_supported:
-                    high_threshold = psu.get_voltage_high_threshold(platform_api_conn, psu_id)
-                    if self.expect(high_threshold is not None,
-                                   "Failed to retrieve the high voltage threshold of PSU {}".format(psu_id)):
-                        self.expect(isinstance(high_threshold, float),
-                                    "PSU {} voltage high threshold appears incorrect".format(psu_id))
-                low_threshold = None
-                voltage_low_threshold_supported = self.get_psu_facts(duthost, psu_id, True, "voltage_low_threshold")
-                if voltage_low_threshold_supported:
-                    low_threshold = psu.get_voltage_low_threshold(platform_api_conn, psu_id)
-                    if self.expect(low_threshold is not None,
-                                   "Failed to retrieve the low voltage threshold of PSU {}".format(psu_id)):
-                        self.expect(isinstance(low_threshold, float),
-                                    "PSU {} voltage low threshold appears incorrect".format(psu_id))
-                if high_threshold is not None and low_threshold is not None:
+                high_threshold = self.get_psu_parameter(psu_info, "voltage_high_threshold",
+                                                        psu.get_voltage_high_threshold, "high voltage threshold")
+                low_threshold = self.get_psu_parameter(psu_info, "voltage_low_threshold",
+                                                       psu.get_voltage_low_threshold, "low voltage threshold")
+
+                if high_threshold and low_threshold:
                     self.expect(voltage < high_threshold and voltage > low_threshold,
                                 "Voltage {} of PSU {} is not in between {} and {}"
                                 .format(voltage, psu_id, low_threshold, high_threshold))
+
         self.assert_expectations()
 
     def test_temperature(self, duthosts, enum_rand_one_per_hwsku_hostname, localhost, platform_api_conn):