open-cas-linux/test/functional/tests/cache_ops/test_concurrent_flushes.py
Daniel Madej 75fd39ed7b Update/fix to test_concurrent_caches_flush
No need to run fio in background. This fixes the issue that
one of the tests didn't wait for fio to finish before
checking stats.
More informative error messages.

Signed-off-by: Daniel Madej <daniel.madej@huawei.com>
2024-10-15 09:46:36 +02:00

224 lines
8.9 KiB
Python

#
# Copyright(c) 2020-2021 Intel Corporation
# Copyright(c) 2024 Huawei Technologies Co., Ltd.
# SPDX-License-Identifier: BSD-3-Clause
#
from time import sleep
import pytest
from api.cas import casadm, casadm_parser, cli
from api.cas.cache_config import CacheMode, CleaningPolicy, CacheModeTrait, SeqCutOffPolicy
from api.cas.casadm_params import StatsFilter
from core.test_run import TestRun
from storage_devices.disk import DiskType, DiskTypeSet, DiskTypeLowerThan
from test_tools.fio.fio import Fio
from test_tools.fio.fio_param import IoEngine, ReadWrite
from test_utils.output import CmdException
from test_utils.size import Size, Unit
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeSet([DiskType.hdd, DiskType.hdd4k]))
def test_concurrent_cores_flush(cache_mode: CacheMode):
"""
title: Flush two cores simultaneously - negative.
description: |
Validate that the attempt to flush another core when there is already one flush in
progress on the same cache will fail.
pass_criteria:
- No system crash.
- First core flushing should finish successfully.
- It should not be possible to run flushing command on cores within
the same cache simultaneously.
"""
with TestRun.step("Prepare cache and core devices"):
cache_dev = TestRun.disks["cache"]
core_dev = TestRun.disks["core"]
cache_dev.create_partitions([Size(2, Unit.GibiByte)])
core_dev.create_partitions([Size(2, Unit.GibiByte)] * 2)
cache_part = cache_dev.partitions[0]
core_part1 = core_dev.partitions[0]
core_part2 = core_dev.partitions[1]
with TestRun.step("Start cache"):
cache = casadm.start_cache(cache_part, cache_mode, force=True)
with TestRun.step("Add both core devices to cache"):
core1 = cache.add_core(core_part1)
core2 = cache.add_core(core_part2)
with TestRun.step("Disable cleaning and sequential cutoff"):
cache.set_cleaning_policy(CleaningPolicy.nop)
cache.set_seq_cutoff_policy(SeqCutOffPolicy.never)
with TestRun.step("Run fio on both cores"):
data_per_core = cache.size / 2
fio = (
Fio()
.create_command()
.io_engine(IoEngine.libaio)
.size(data_per_core)
.block_size(Size(4, Unit.MebiByte))
.read_write(ReadWrite.write)
.direct(1)
)
for core in [core1, core2]:
fio.add_job().target(core.path)
fio.run()
with TestRun.step("Check if both cores contain dirty blocks"):
required_dirty_data = (
(data_per_core * 0.9).align_down(Unit.Blocks4096.value).set_unit(Unit.Blocks4096)
)
core1_dirty_data = core1.get_dirty_blocks()
if core1_dirty_data < required_dirty_data:
TestRun.fail(f"Core {core1.core_id} does not contain enough dirty data.\n"
f"Expected at least {required_dirty_data}, actual {core1_dirty_data}.")
core2_dirty_data_before = core2.get_dirty_blocks()
if core2_dirty_data_before < required_dirty_data:
TestRun.fail(f"Core {core2.core_id} does not contain enough dirty data.\n"
f"Expected at least {required_dirty_data}, actual "
f" {core2_dirty_data_before}.")
with TestRun.step("Start flushing the first core in background"):
output_pid = TestRun.executor.run_in_background(
cli.flush_core_cmd(str(cache.cache_id), str(core1.core_id))
)
if not TestRun.executor.check_if_process_exists(output_pid):
TestRun.fail("Failed to start core flush in background")
with TestRun.step("Wait until flush starts"):
while TestRun.executor.check_if_process_exists(output_pid):
try:
casadm_parser.get_flushing_progress(cache.cache_id, core1.core_id)
break
except CmdException:
pass
with TestRun.step(
"Wait until first core reaches 40% flush and start flush operation on the second core"
):
percentage = 0
while percentage < 40:
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core1.core_id)
try:
core2.flush_core()
TestRun.fail(
"The first core is flushing right now so flush attempt of the second core "
"should fail"
)
except CmdException:
TestRun.LOGGER.info(
"The first core is flushing right now so the second core's flush "
"fails as expected"
)
with TestRun.step("Wait for the first core to finish flushing"):
try:
percentage = 0
while percentage < 100:
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core1.core_id)
sleep(1)
except CmdException:
TestRun.LOGGER.info("The first core is not flushing dirty data anymore")
with TestRun.step("Check the size of dirty data on both cores"):
core1_dirty_data = core1.get_dirty_blocks()
if core1_dirty_data > Size.zero():
TestRun.LOGGER.error(
"There should not be any dirty data on the first core after completed flush.\n"
f"Dirty data: {core1_dirty_data}."
)
core2_dirty_data_after = core2.get_dirty_blocks()
if core2_dirty_data_after != core2_dirty_data_before:
TestRun.LOGGER.error(
"Dirty data on the second core after failed flush should not change."
f"Dirty data before flush: {core2_dirty_data_before}, "
f"after: {core2_dirty_data_after}"
)
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_concurrent_caches_flush(cache_mode: CacheMode):
"""
title: Flush multiple caches simultaneously.
description: |
CAS should successfully flush multiple caches if there is already other flush in progress.
pass_criteria:
- No system crash.
- Flush for each cache should finish successfully.
"""
caches_number = 3
with TestRun.step("Prepare cache and core devices"):
cache_dev = TestRun.disks["cache"]
core_dev = TestRun.disks["core"]
cache_dev.create_partitions([Size(2, Unit.GibiByte)] * caches_number)
core_dev.create_partitions([Size(2, Unit.GibiByte) * 2] * caches_number)
with TestRun.step(f"Start {caches_number} caches"):
caches = [
casadm.start_cache(cache_dev=part, cache_mode=cache_mode, force=True)
for part in cache_dev.partitions
]
with TestRun.step("Disable cleaning and sequential cutoff"):
for cache in caches:
cache.set_cleaning_policy(CleaningPolicy.nop)
cache.set_seq_cutoff_policy(SeqCutOffPolicy.never)
with TestRun.step("Add cores to caches"):
cores = [cache.add_core(core_dev=core_dev.partitions[i]) for i, cache in enumerate(caches)]
with TestRun.step("Run fio on all cores"):
fio = (
Fio()
.create_command()
.io_engine(IoEngine.libaio)
.block_size(Size(4, Unit.MebiByte))
.size(cache.size)
.read_write(ReadWrite.write)
.direct(1)
)
for core in cores:
fio.add_job().target(core)
fio.run()
with TestRun.step("Check if each cache is full of dirty blocks"):
for cache in caches:
cache_stats = cache.get_statistics(stat_filter=[StatsFilter.usage], percentage_val=True)
if cache_stats.usage_stats.dirty < 90:
TestRun.fail(f"Cache {cache.cache_id} should contain at least 90% of dirty data, "
f"actual dirty data: {cache_stats.usage_stats.dirty}%")
with TestRun.step("Start flush operation on all caches simultaneously"):
flush_pids = [
TestRun.executor.run_in_background(cli.flush_cache_cmd(str(cache.cache_id)))
for cache in caches
]
with TestRun.step("Wait for all caches to finish flushing"):
for flush_pid in flush_pids:
while TestRun.executor.check_if_process_exists(flush_pid):
sleep(1)
with TestRun.step("Check number of dirty data on each cache"):
for cache in caches:
dirty_blocks = cache.get_dirty_blocks()
if dirty_blocks > Size.zero():
TestRun.LOGGER.error(
f"The quantity of dirty data on cache {cache.cache_id} after complete "
f"flush should be zero, is: {dirty_blocks.set_unit(Unit.Blocks4096)}"
)