Update for test_parallel_core_flushing

Add oflag=direct
Stop cache before calculating backend md5 sums
Check all cores before ending test on fail
Print flushing progress in log
Check occupancy and dirty for each core
Minor fix and refactor

Signed-off-by: Daniel Madej <daniel.madej@huawei.com>
This commit is contained in:
Daniel Madej 2024-09-30 16:11:54 +02:00
parent bc6165a185
commit 829dbf9ba5

View File

@ -1,5 +1,6 @@
# #
# Copyright(c) 2022 Intel Corporation # Copyright(c) 2022 Intel Corporation
# Copyright(c) 2024 Huawei Technologies
# SPDX-License-Identifier: BSD-3-Clause # SPDX-License-Identifier: BSD-3-Clause
# #
@ -25,12 +26,14 @@ def test_parallel_core_flushing():
description: Test checks whether all cores attached to one cache instance are flushed description: Test checks whether all cores attached to one cache instance are flushed
in parallel after executing flush cache command. in parallel after executing flush cache command.
pass_criteria: pass_criteria:
- all cores should flushed in parallel - all cores should be flushed in parallel
- checksums for cores and core devices should be identical - checksums for cores and core devices should be identical
""" """
fail = False
with TestRun.step("Prepare devices."): with TestRun.step("Prepare devices."):
cache_disk = TestRun.disks['cache'] cache_disk = TestRun.disks['cache']
cache_disk.create_partitions([Size(8, Unit.GibiByte)]) cache_disk.create_partitions([Size(9, Unit.GibiByte)])
cache_dev = cache_disk.partitions[0] cache_dev = cache_disk.partitions[0]
core_disk = TestRun.disks['core'] core_disk = TestRun.disks['core']
@ -44,64 +47,86 @@ def test_parallel_core_flushing():
cache.set_cleaning_policy(CleaningPolicy.nop) cache.set_cleaning_policy(CleaningPolicy.nop)
cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) cache.set_seq_cutoff_policy(SeqCutOffPolicy.never)
with TestRun.step("Run IO on each CAS device."): with TestRun.step("Run IO on each cached volume."):
for core in cores: for core in cores:
Dd().output(core.path)\ dd = (Dd()
.input("/dev/urandom")\ .output(core.path)
.block_size(Size(1, Unit.Blocks4096))\ .input("/dev/urandom")
.run() .block_size(Size(1, Unit.Blocks4096))
.oflag("direct"))
dd.run()
with TestRun.step("Check if occupancy of all CAS devices increased " with TestRun.step("Check if occupancy for all cores increased "
"and there is dirty data on them."): "and there are dirty data on them."):
proper_stats = (0.9 * core_partition_size).set_unit(Unit.Blocks4096) proper_stats = ((0.9 * core_partition_size)
.align_down(Unit.Blocks4096.value)
.set_unit(Unit.Blocks4096))
for core in cores:
occupancy = core.get_occupancy().set_unit(Unit.Blocks4096) occupancy = core.get_occupancy().set_unit(Unit.Blocks4096)
dirty = core.get_dirty_blocks().set_unit(Unit.Blocks4096) dirty = core.get_dirty_blocks().set_unit(Unit.Blocks4096)
for core in cores:
if occupancy > proper_stats and dirty > proper_stats: if occupancy > proper_stats and dirty > proper_stats:
TestRun.LOGGER.info(f"Stats are as expected for core {core.core_id}.") TestRun.LOGGER.info(f"Stats are as expected for core {core.core_id}.")
else: else:
TestRun.fail(f"Stats are not as expected for core {core.core_id}\n" TestRun.LOGGER.error(f"Stats are not as expected for core {core.core_id}\n"
f"Occupancy: {occupancy}\n" f"Occupancy: {occupancy}\n"
f"Dirty: {dirty}\n" f"Dirty: {dirty}\n"
f"Proper stats: {proper_stats}") f"Required at least: {proper_stats}")
fail = True
if fail:
TestRun.fail("Cannot achieve proper cache state for test")
with TestRun.step("In background run flush cache command."): with TestRun.step("Run flush cache command in background."):
pid = TestRun.executor.run_in_background(cli.flush_cache_cmd(str(cache.cache_id))) pid = TestRun.executor.run_in_background(cli.flush_cache_cmd(str(cache.cache_id)))
with TestRun.step("Check whether all CAS devices are in 'Flushing' state and wait for finish."): with TestRun.step("Check whether all cores are in 'Flushing' state and wait for finish."):
for core in cores: for core in cores:
wait_for_flushing(cache, core, timedelta(seconds=10)) wait_for_flushing(cache, core, timedelta(seconds=10))
percentages = [0] * len(cores) percentages = [0] * len(cores)
log_threshold = 10
TestRun.LOGGER.info('Flushing progress:')
while TestRun.executor.check_if_process_exists(pid): while TestRun.executor.check_if_process_exists(pid):
current_values = get_progress(cache, cores) current_values = get_progress(cache, cores)
if any(p >= log_threshold for p in current_values):
TestRun.LOGGER.info(f'{current_values}')
log_threshold = log_threshold + 10
for old, new, core in zip(percentages, current_values, cores): for old, new, core in zip(percentages, current_values, cores):
if old > new: if old > new:
TestRun.fail(f"Core {core.id}: progress decreased from {old}% to {new}%") TestRun.LOGGER.error(
f"Core {core.core_id}: progress decreased from {old}% to {new}%"
)
fail = True
if fail:
TestRun.fail("Flushing progress error")
percentages = current_values percentages = current_values
with TestRun.step("Check if amount of dirty data on each CAS device equals 0."): with TestRun.step("Check if amount of dirty data for each core equals 0."):
for core in cores: for core in cores:
dirty_blocks = core.get_dirty_blocks() dirty_blocks = core.get_dirty_blocks()
if dirty_blocks != Size.zero(): if dirty_blocks != Size.zero():
TestRun.fail(f"Core {core.core_id} contain dirty data. " TestRun.LOGGER.error(
f"Dirty blocks: {dirty_blocks}") f"Core {core.core_id} contains dirty blocks: {dirty_blocks}"
)
fail = True
if fail:
TestRun.fail("Dirty data not flushed completely")
with TestRun.step("Count checksums of each CAS device and check if it is identical " with TestRun.step("Calculate md5 for each cached volume."):
"with checksum of core device associated with it."):
TestRun.LOGGER.info("Calculate md5 for each core.")
core_md5s = [File(core.path).md5sum() for core in cores] core_md5s = [File(core.path).md5sum() for core in cores]
TestRun.LOGGER.info("Calculate md5 for each core device.") with TestRun.step("Stop cache without flushing data."):
cache.stop(no_data_flush=True)
with TestRun.step("Calculate md5 for each backend device."):
dev_md5s = [File(dev.path).md5sum() for dev in core_devices] dev_md5s = [File(dev.path).md5sum() for dev in core_devices]
TestRun.LOGGER.info("Compare md5 sums for cores and core devices.") with TestRun.step("Compare md5 sums for cached volumes and corresponding backend devices."):
for core_md5, dev_md5, core in zip(core_md5s, dev_md5s, cores): for core_md5, dev_md5, core in zip(core_md5s, dev_md5s, cores):
if core_md5 != dev_md5: if core_md5 != dev_md5:
TestRun.fail(f"MD5 sums of core {core.core_id} and core device do not match!") TestRun.LOGGER.error(f"MD5 sums of cached volume {core.path} and core device "
f"{core.core_device.path} do not match!")
def get_progress(cache, cores): def get_progress(cache, cores):