diff --git a/test/functional/tests/stats/test_consistency_between_outputs.py b/test/functional/tests/stats/test_consistency_between_outputs.py index 61fffd025..36cd05ad0 100644 --- a/test/functional/tests/stats/test_consistency_between_outputs.py +++ b/test/functional/tests/stats/test_consistency_between_outputs.py @@ -1,10 +1,13 @@ # # Copyright(c) 2020-2021 Intel Corporation +# Copyright(c) 2024 Huawei Technologies # SPDX-License-Identifier: BSD-3-Clause # import random import re +from collections import OrderedDict + import pytest from api.cas.cache_config import CacheMode, CacheLineSize, CacheModeTrait @@ -12,77 +15,74 @@ from core.test_run import TestRun from storage_devices.disk import DiskType, DiskTypeSet, DiskTypeLowerThan from test_tools.dd import Dd -from test_tools.disk_utils import Filesystem from test_utils.size import Size, Unit -iterations = 64 -cache_size = Size(8, Unit.GibiByte) +iterations = 4 +cache_size = Size(4, Unit.GibiByte) -@pytest.mark.parametrizex("cache_line_size", CacheLineSize) @pytest.mark.parametrizex("cache_mode", CacheMode.with_any_trait( - CacheModeTrait.InsertRead | CacheModeTrait.InsertWrite)) -@pytest.mark.parametrizex("test_object", ["cache", "core"]) + CacheModeTrait.InsertRead | CacheModeTrait.InsertWrite +)) @pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand])) @pytest.mark.require_disk("core", DiskTypeLowerThan("cache")) -def test_output_consistency(cache_line_size, cache_mode, test_object): +def test_output_consistency(cache_mode): """ - title: Test consistency between different cache and core statistics' outputs. + title: Test consistency between different cache and core statistics' output formats. description: | Check if OpenCAS's statistics for cache and core are consistent regardless of the output format. pass_criteria: - - Statistics in CSV format matches statistics in table format. + - Statistics in CSV format match statistics in table format. """ - with TestRun.step("Prepare cache and core."): - cache_dev = TestRun.disks['cache'] + cache_line_size = random.choice(list(CacheLineSize)) + + with TestRun.step("Prepare cache and core devices"): + cache_dev = TestRun.disks["cache"] cache_dev.create_partitions([cache_size]) cache_part = cache_dev.partitions[0] - core_dev = TestRun.disks['core'] + core_dev = TestRun.disks["core"] core_dev.create_partitions([cache_size * 4]) core_part = core_dev.partitions[0] blocks_in_cache = int(cache_size / cache_line_size.value) - with TestRun.step("Start cache and add core with a filesystem."): + with TestRun.step("Start cache and add core"): cache = start_cache(cache_part, cache_mode, cache_line_size, force=True) - core_part.create_filesystem(Filesystem.xfs) exp_obj = cache.add_core(core_part) - with TestRun.step("Select object to test."): - if test_object == "cache": - tested_object = cache - flush = tested_object.flush_cache - elif test_object == "core": - tested_object = exp_obj - flush = tested_object.flush_core - else: - TestRun.LOGGER.error("Wrong type of device to read statistics from.") - for _ in TestRun.iteration(range(iterations), f"Run configuration {iterations} times"): - with TestRun.step(f"Reset stats and run workload on the {test_object}."): - tested_object.reset_counters() + with TestRun.step("Reset stats and run workload"): + cache.reset_counters() # Run workload on a random portion of the tested object's capacity, # not too small, but not more than half the size - random_count = random.randint(blocks_in_cache / 32, blocks_in_cache / 2) + random_count = random.randint(blocks_in_cache // 32, blocks_in_cache // 2) TestRun.LOGGER.info(f"Run workload on {(random_count / blocks_in_cache * 100):.2f}% " - f"of {test_object}'s capacity.") + "of cache's capacity.") dd_builder(cache_mode, cache_line_size, random_count, exp_obj).run() - with TestRun.step(f"Flush {test_object} and get statistics from different outputs."): - flush() - csv_stats = get_stats_from_csv( - cache.cache_id, tested_object.core_id if test_object == "core" else None + with TestRun.step("Get statistics from different outputs"): + cache_csv_output = print_statistics(cache.cache_id, output_format=OutputFormat.csv) + cache_table_output = print_statistics(cache.cache_id, output_format=OutputFormat.table) + cache_csv_stats = get_stats_from_csv(cache_csv_output) + cache_table_stats = get_stats_from_table(cache_table_output) + + core_csv_output = print_statistics( + exp_obj.cache_id, exp_obj.core_id, output_format=OutputFormat.csv ) - table_stats = get_stats_from_table( - cache.cache_id, tested_object.core_id if test_object == "core" else None + core_table_output = print_statistics( + exp_obj.cache_id, exp_obj.core_id, output_format=OutputFormat.table ) + core_csv_stats = get_stats_from_csv(core_csv_output) + core_table_stats = get_stats_from_table(core_table_output) - with TestRun.step("Compare statistics between outputs."): - if csv_stats != table_stats: - TestRun.LOGGER.error(f"Inconsistent outputs:\n{csv_stats}\n\n{table_stats}") + with TestRun.step("Compare statistics between outputs"): + TestRun.LOGGER.info("Cache stats comparison") + compare_csv_and_table(cache_csv_stats, cache_table_stats) + TestRun.LOGGER.info("Core stats comparison") + compare_csv_and_table(core_csv_stats, core_table_stats) -def get_stats_from_csv(cache_id: int, core_id: int = None): +def get_stats_from_csv(output): """ 'casadm -P' csv output has two lines: 1st - statistics names with units @@ -90,18 +90,16 @@ def get_stats_from_csv(cache_id: int, core_id: int = None): This function returns dictionary with statistics names with units as keys and statistics values as values. """ - output = print_statistics(cache_id, core_id, output_format=OutputFormat.csv) - output = output.stdout.splitlines() keys = output[0].split(",") values = output[1].split(",") # return the keys and the values as a dictionary - return dict(zip(keys, values)) + return OrderedDict(zip(keys, values)) -def get_stats_from_table(cache_id: int, core_id: int = None): +def get_stats_from_table(output): """ 'casadm -P' table output has a few sections: 1st - config section with two columns @@ -109,7 +107,6 @@ def get_stats_from_table(cache_id: int, core_id: int = None): This function returns dictionary with statistics names with units as keys and statistics values as values. """ - output = print_statistics(cache_id, core_id, output_format=OutputFormat.table) output = output.stdout.splitlines() output_parts = [] @@ -123,20 +120,20 @@ def get_stats_from_table(cache_id: int, core_id: int = None): # the first part is config section conf_section = output_parts.pop(0) - keys, values = (parse_core_conf_section(conf_section) if core_id - else parse_cache_conf_section(conf_section)) + id_row = _find_id_row(conf_section) + column_width = _check_first_column_width(id_row) + stat_dict = parse_conf_section(conf_section, column_width) # parse each remaining section for section in output_parts: # the remaining parts are table sections - part_of_keys, part_of_values = parse_tables_section(section) + part_of_stat_dict = parse_tables_section(section) - # receive keys and values lists from every section - keys.extend(part_of_keys) - values.extend(part_of_values) + # receive keys and values from every section + stat_dict.update(part_of_stat_dict) # return the keys and the values as a dictionary - return dict(zip(keys, values)) + return stat_dict def parse_conf_section(table_as_list: list, column_width: int): @@ -145,45 +142,34 @@ def parse_conf_section(table_as_list: list, column_width: int): of the first section in the statistics output in table format. The first section in the 'casadm -P' output have two columns. """ - keys = [] - values = [] + stat_dict = OrderedDict() + # reformat table table_as_list = separate_values_to_two_lines(table_as_list, column_width) + # 'Dirty for' in csv has one entry with and one without unit, we want to match that + # and set this to False after the first entry is processed + process_dirty_for = True + # split table lines to statistic name and its value # and save them to keys and values tables for line in table_as_list: - splitted_line = [] - + key, value = line[:column_width], line[column_width:] + is_dirty_for = key.startswith("Dirty for") # move unit from value to statistic name if needed - sqr_brackets_counter = line.count("[") - if sqr_brackets_counter: - addition = line[line.index("["):line.index("]") + 1] - splitted_line.insert(0, line[:column_width] + addition) - splitted_line.insert(1, line[column_width:].replace(addition, "")) - else: - splitted_line.insert(0, line[:column_width]) - splitted_line.insert(1, line[column_width:]) + if "[" in value and (not is_dirty_for or process_dirty_for): + unit = line[line.index("["):line.index("]") + 1] + key = key + unit + value = value.replace(unit, "") + if is_dirty_for: + process_dirty_for = False # remove whitespaces - # save each statistic name (with unit) to keys - keys.append(re.sub(r'\s+', ' ', splitted_line[0]).strip()) - # save each statistic value to values - values.append(re.sub(r'\s+', ' ', splitted_line[1]).strip()) + key = re.sub(r"\s+", " ", key).strip() + value = re.sub(r"\s+", " ", value).strip() + stat_dict[key] = value - return keys, values - - -def parse_cache_conf_section(table_as_list: list): - id_row = _find_id_row(table_as_list) - column_width = _check_first_column_width(id_row) - return parse_conf_section(table_as_list, column_width) - - -def parse_core_conf_section(table_as_list: list): - id_row = _find_id_row(table_as_list) - column_width = _check_first_column_width(id_row) - return parse_conf_section(table_as_list, column_width) + return stat_dict def _find_id_row(table_as_list: list): @@ -209,7 +195,7 @@ def separate_values_to_two_lines(table_as_list: list, column_width: int): replace this line with two lines, each containing value in one unit. """ for i, line in enumerate(table_as_list): - has_two_units = line.count(" / ") + has_two_units = " / " in line if has_two_units: table_as_list.remove(line) value_parts = line[column_width:].split(" / ") @@ -228,8 +214,7 @@ def parse_tables_section(table_as_list: list): 3rd: % - percentage values 4th: Units - full units for values stored in 2nd column """ - keys = [] - values = [] + stats_dict = OrderedDict() # remove table header - 3 lines, it is useless table_as_list = table_as_list[3:] @@ -241,21 +226,22 @@ def parse_tables_section(table_as_list: list): # split lines to columns and remove whitespaces for line in table_as_list: - splitted_line = re.split(r'│|\|', line) - for i in range(len(splitted_line)): - splitted_line[i] = splitted_line[i].strip() + split_line = re.split(r"[│|]", line) + split_line = [part.strip() for part in split_line] # save keys and values in order: # key: statistic name and unit # value: value in full unit - keys.append(f'{splitted_line[1]} [{splitted_line[4]}]') - values.append(splitted_line[2]) + key = f"{split_line[1]} [{split_line[4]}]" + value = split_line[2] + stats_dict[key] = value # key: statistic name and percent sign # value: value as percentage - keys.append(f'{splitted_line[1]} [%]') - values.append(splitted_line[3]) + key = f"{split_line[1]} [%]" + value = split_line[3] + stats_dict[key] = value - return keys, values + return stats_dict def is_table_separator(line: str): @@ -265,6 +251,29 @@ def is_table_separator(line: str): return ('+' or '╪' or '╧') in line +def compare_csv_and_table(csv_stats, table_stats): + if csv_stats != table_stats: + wrong_keys = [] + dirty_for_similar = True + for key in csv_stats: + if csv_stats[key] != table_stats[key]: + if not key.startswith("Dirty for") or not dirty_for_similar: + wrong_keys.append(key) + continue + if "[s]" not in key: + continue + # 'Dirty for' values might differ by 1 [s] + dirty_for_similar = int(csv_stats[key]) - int(table_stats[key]) in {-1, 1} + if not dirty_for_similar: + wrong_keys.append(key) + + if len(csv_stats) != len(table_stats) or wrong_keys: + TestRun.LOGGER.error( + f"Inconsistent outputs:\n{csv_stats}\n\n{table_stats}" + + (f"\nWrong keys: {', '.join(wrong_keys)}" if wrong_keys else "") + ) + + def dd_builder(cache_mode, cache_line_size, count, device): dd = (Dd() .block_size(cache_line_size.value)