From 3023cccd2216a8fc06aeec018e8a4cc2a024ed0a Mon Sep 17 00:00:00 2001 From: Daniel Madej Date: Mon, 30 Sep 2024 16:11:54 +0200 Subject: [PATCH] Update for test_parallel_core_flushing Check all cores before ending test on fail Print flushing progress in log Check occupancy and dirty for each core Minor fix and refactor Signed-off-by: Daniel Madej --- .../test_parallel_core_flushing.py | 72 ++++++++++++------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/test/functional/tests/lazy_writes/test_parallel_core_flushing.py b/test/functional/tests/lazy_writes/test_parallel_core_flushing.py index 197d5d307..e8d9ecee7 100644 --- a/test/functional/tests/lazy_writes/test_parallel_core_flushing.py +++ b/test/functional/tests/lazy_writes/test_parallel_core_flushing.py @@ -1,5 +1,6 @@ # # Copyright(c) 2022 Intel Corporation +# Copyright(c) 2024 Huawei Technologies # SPDX-License-Identifier: BSD-3-Clause # @@ -25,9 +26,11 @@ def test_parallel_core_flushing(): description: Test checks whether all cores attached to one cache instance are flushed in parallel after executing flush cache command. pass_criteria: - - all cores should flushed in parallel + - all cores should be flushed in parallel - checksums for cores and core devices should be identical """ + fail = False + with TestRun.step("Prepare devices."): cache_disk = TestRun.disks['cache'] cache_disk.create_partitions([Size(8, Unit.GibiByte)]) @@ -44,64 +47,81 @@ def test_parallel_core_flushing(): cache.set_cleaning_policy(CleaningPolicy.nop) cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) - with TestRun.step("Run IO on each CAS device."): + with TestRun.step("Run IO on each cached volume."): for core in cores: Dd().output(core.path)\ .input("/dev/urandom")\ .block_size(Size(1, Unit.Blocks4096))\ .run() - with TestRun.step("Check if occupancy of all CAS devices increased " - "and there is dirty data on them."): - proper_stats = (0.9 * core_partition_size).set_unit(Unit.Blocks4096) - occupancy = core.get_occupancy().set_unit(Unit.Blocks4096) - dirty = core.get_dirty_blocks().set_unit(Unit.Blocks4096) + with TestRun.step("Check if occupancy for all cores increased " + "and there are dirty data on them."): + proper_stats = ((0.9 * core_partition_size) + .align_down(Unit.Blocks4096.value) + .set_unit(Unit.Blocks4096)) for core in cores: + occupancy = core.get_occupancy().set_unit(Unit.Blocks4096) + dirty = core.get_dirty_blocks().set_unit(Unit.Blocks4096) if occupancy > proper_stats and dirty > proper_stats: TestRun.LOGGER.info(f"Stats are as expected for core {core.core_id}.") else: - TestRun.fail(f"Stats are not as expected for core {core.core_id}\n" - f"Occupancy: {occupancy}\n" - f"Dirty: {dirty}\n" - f"Proper stats: {proper_stats}") - - with TestRun.step("In background run flush cache command."): + TestRun.LOGGER.error(f"Stats are not as expected for core {core.core_id}\n" + f"Occupancy: {occupancy}\n" + f"Dirty: {dirty}\n" + f"Required at least: {proper_stats}") + fail = True + if fail: + TestRun.fail("Cannot achieve proper cache state for test") + + with TestRun.step("Run flush cache command in background."): pid = TestRun.executor.run_in_background(cli.flush_cache_cmd(str(cache.cache_id))) - with TestRun.step("Check whether all CAS devices are in 'Flushing' state and wait for finish."): + with TestRun.step("Check whether all cores are in 'Flushing' state and wait for finish."): for core in cores: wait_for_flushing(cache, core, timedelta(seconds=10)) percentages = [0] * len(cores) + log_threshold = 10 + TestRun.LOGGER.info('Flushing progress:') while TestRun.executor.check_if_process_exists(pid): current_values = get_progress(cache, cores) + if any(p >= log_threshold for p in current_values): + TestRun.LOGGER.info(f'{current_values}') + log_threshold = log_threshold + 10 for old, new, core in zip(percentages, current_values, cores): if old > new: - TestRun.fail(f"Core {core.id}: progress decreased from {old}% to {new}%") + TestRun.LOGGER.error( + f"Core {core.core_id}: progress decreased from {old}% to {new}%" + ) + fail = True + if fail: + TestRun.fail("Flushing progress error") percentages = current_values - with TestRun.step("Check if amount of dirty data on each CAS device equals 0."): + with TestRun.step("Check if amount of dirty data for each core equals 0."): for core in cores: dirty_blocks = core.get_dirty_blocks() if dirty_blocks != Size.zero(): - TestRun.fail(f"Core {core.core_id} contain dirty data. " - f"Dirty blocks: {dirty_blocks}") - - with TestRun.step("Count checksums of each CAS device and check if it is identical " - "with checksum of core device associated with it."): - - TestRun.LOGGER.info("Calculate md5 for each core.") + TestRun.LOGGER.error( + f"Core {core.core_id} contains dirty blocks: {dirty_blocks}" + ) + fail = True + if fail: + TestRun.fail("Dirty data not flushed completely") + + with TestRun.step("Calculate md5 for each cached volume."): core_md5s = [File(core.path).md5sum() for core in cores] - TestRun.LOGGER.info("Calculate md5 for each core device.") + with TestRun.step("Calculate md5 for each backend device."): dev_md5s = [File(dev.path).md5sum() for dev in core_devices] - TestRun.LOGGER.info("Compare md5 sums for cores and core devices.") + with TestRun.step("Compare md5 sums for cached volumes and corresponding backend devices."): for core_md5, dev_md5, core in zip(core_md5s, dev_md5s, cores): if core_md5 != dev_md5: - TestRun.fail(f"MD5 sums of core {core.core_id} and core device do not match!") + TestRun.LOGGER.error(f"MD5 sums of cached volume {core.path} and core device " + f"{core.core_device.path} do not match!") def get_progress(cache, cores):