open-cas-linux/test/functional/tests/fault_injection/test_fault_injection_interrupts.py
Slawomir Jankowski 17528aefdd Add interrupt cache stop interrupt test
Signed-off-by: Slawomir Jankowski <slawomir.jankowski@intel.com>
2020-02-17 15:36:31 +01:00

535 lines
23 KiB
Python

#
# Copyright(c) 2020 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
from time import sleep
import pytest
from api.cas import casadm, casadm_parser, cli
from api.cas.cache_config import CacheMode, CleaningPolicy, CacheModeTrait
from storage_devices.disk import DiskType, DiskTypeSet, DiskTypeLowerThan
from core.test_run import TestRun
from test_tools.disk_utils import Filesystem
from test_utils import os_utils
from test_utils.os_utils import Udev, DropCachesMode
from test_utils.size import Size, Unit
mount_point = "/mnt/cas"
test_file_path = f"{mount_point}/test_file"
iterations_per_config = 10
cache_size = Size(16, Unit.GibiByte)
@pytest.mark.parametrize("filesystem", Filesystem)
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_interrupt_core_flush(cache_mode, filesystem):
"""
title: Test if OpenCAS works correctly after core's flushing interruption.
description: |
Negative test of the ability of OpenCAS to handle core flushing interruption.
pass_criteria:
- No system crash.
- Flushing would be stopped after interruption.
- Md5sum are correct during all test steps.
- Dirty blocks quantity after interruption is equal or lower.
"""
with TestRun.step("Prepare cache and core."):
cache_part, core_part = prepare()
for _ in TestRun.iteration(range(iterations_per_config),
f"Reload cache configuration {iterations_per_config} times."):
with TestRun.step("Start cache."):
cache = casadm.start_cache(cache_part, cache_mode, force=True)
with TestRun.step("Set cleaning policy to NOP."):
cache.set_cleaning_policy(CleaningPolicy.nop)
with TestRun.step(f"Add core device with {filesystem} filesystem and mount it."):
core_part.create_filesystem(filesystem)
core = cache.add_core(core_part)
core.mount(mount_point)
with TestRun.step(f"Create test file in mount point of exported object."):
test_file = create_test_file()
with TestRun.step("Check md5 sum of test file."):
test_file_md5sum_before = test_file.md5sum()
with TestRun.step("Get number of dirty data on exported object before interruption."):
os_utils.sync()
os_utils.drop_caches(DropCachesMode.ALL)
core_dirty_blocks_before = core.get_dirty_blocks()
with TestRun.step("Start flushing core device."):
flush_pid = TestRun.executor.run_in_background(
cli.flush_core_cmd(str(cache.cache_id), str(core.core_id)))
sleep(2)
with TestRun.step("Interrupt core flushing."):
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core.core_id)
while percentage < 50:
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core.core_id)
TestRun.executor.run(f"kill -s SIGINT {flush_pid}")
with TestRun.step("Check number of dirty data on exported object after interruption."):
core_dirty_blocks_after = core.get_dirty_blocks()
if core_dirty_blocks_after >= core_dirty_blocks_before:
TestRun.LOGGER.error("Quantity of dirty lines after core flush interruption "
"should be lower.")
if int(core_dirty_blocks_after) == 0:
TestRun.LOGGER.error("Quantity of dirty lines after core flush interruption "
"should not be zero.")
with TestRun.step("Unmount core and stop cache."):
core.unmount()
cache.stop()
with TestRun.step("Mount core device."):
core_part.mount(mount_point)
with TestRun.step("Check md5 sum of test file again."):
if test_file_md5sum_before != test_file.md5sum():
TestRun.LOGGER.error(
"Md5 sums before and after interrupting core flush are different.")
with TestRun.step("Unmount core device."):
core_part.unmount()
@pytest.mark.parametrize("filesystem", Filesystem)
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_interrupt_cache_flush(cache_mode, filesystem):
"""
title: Test if OpenCAS works correctly after cache's flushing interruption.
description: |
Negative test of the ability of OpenCAS to handle cache flushing interruption.
pass_criteria:
- No system crash.
- Flushing would be stopped after interruption.
- Md5sum are correct during all test steps.
- Dirty blocks quantity after interruption is equal or lower.
"""
with TestRun.step("Prepare cache and core."):
cache_part, core_part = prepare()
for _ in TestRun.iteration(range(iterations_per_config),
f"Reload cache configuration {iterations_per_config} times."):
with TestRun.step("Start cache."):
cache = casadm.start_cache(cache_part, cache_mode, force=True)
with TestRun.step("Set cleaning policy to NOP."):
cache.set_cleaning_policy(CleaningPolicy.nop)
with TestRun.step(f"Add core device with {filesystem} filesystem and mount it."):
core_part.create_filesystem(filesystem)
core = cache.add_core(core_part)
core.mount(mount_point)
with TestRun.step(f"Create test file in mount point of exported object."):
test_file = create_test_file()
with TestRun.step("Check md5 sum of test file."):
test_file_md5sum_before = test_file.md5sum()
with TestRun.step("Get number of dirty data on exported object before interruption."):
os_utils.sync()
os_utils.drop_caches(DropCachesMode.ALL)
cache_dirty_blocks_before = cache.get_dirty_blocks()
with TestRun.step("Start flushing cache."):
flush_pid = TestRun.executor.run_in_background(
cli.flush_cache_cmd(str(cache.cache_id)))
sleep(2)
with TestRun.step("Interrupt cache flushing"):
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core.core_id)
while percentage < 50:
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core.core_id)
TestRun.executor.run(f"kill -s SIGINT {flush_pid}")
with TestRun.step("Check number of dirty data on exported object after interruption."):
cache_dirty_blocks_after = cache.get_dirty_blocks()
if cache_dirty_blocks_after >= cache_dirty_blocks_before:
TestRun.LOGGER.error("Quantity of dirty lines after cache flush interruption "
"should be lower.")
if int(cache_dirty_blocks_after) == 0:
TestRun.LOGGER.error("Quantity of dirty lines after cache flush interruption "
"should not be zero.")
with TestRun.step("Unmount core and stop cache."):
core.unmount()
cache.stop()
with TestRun.step("Mount core device."):
core_part.mount(mount_point)
with TestRun.step("Check md5 sum of test file again."):
if test_file_md5sum_before != test_file.md5sum():
TestRun.LOGGER.error(
"Md5 sums before and after interrupting cache flush are different.")
with TestRun.step("Unmount core device."):
core_part.unmount()
@pytest.mark.parametrize("filesystem", Filesystem)
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_interrupt_core_remove(cache_mode, filesystem):
"""
title: Test if OpenCAS works correctly after core's removing interruption.
description: |
Negative test of the ability of OpenCAS to handle core's removing interruption.
pass_criteria:
- No system crash.
- Core would not be removed from cache after interruption.
- Flushing would be stopped after interruption.
- Md5sum are correct during all test steps.
- Dirty blocks quantity after interruption is equal or lower.
"""
with TestRun.step("Prepare cache and core."):
cache_dev = TestRun.disks['cache']
cache_dev.create_partitions([cache_size])
cache_part = cache_dev.partitions[0]
core_dev = TestRun.disks['core']
core_dev.create_partitions([cache_size * 2])
core_part = core_dev.partitions[0]
for _ in TestRun.iteration(range(iterations_per_config),
f"Reload cache configuration {iterations_per_config} times."):
with TestRun.step("Start cache."):
cache = casadm.start_cache(cache_part, cache_mode, force=True)
with TestRun.step("Set cleaning policy to NOP."):
cache.set_cleaning_policy(CleaningPolicy.nop)
with TestRun.step(f"Add core device with {filesystem} filesystem and mount it."):
core_part.create_filesystem(filesystem)
core = cache.add_core(core_part)
core.mount(mount_point)
with TestRun.step(f"Create test file in mount point of exported object."):
test_file = create_test_file()
with TestRun.step("Check md5 sum of test file."):
test_file_md5sum_before = test_file.md5sum()
with TestRun.step(
"Get number of dirty data on exported object before core removal interruption."):
os_utils.sync()
os_utils.drop_caches(DropCachesMode.ALL)
cache_dirty_blocks_before = cache.get_dirty_blocks()
with TestRun.step("Unmount core."):
core.unmount()
with TestRun.step("Start removing core device."):
flush_pid = TestRun.executor.run_in_background(
cli.remove_core_cmd(str(cache.cache_id), str(core.core_id)))
sleep(2)
with TestRun.step("Interrupt core removing"):
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core.core_id)
while percentage < 50:
percentage = casadm_parser.get_flushing_progress(cache.cache_i, core.core_id)
TestRun.executor.run(f"kill -s SIGINT {flush_pid}")
with TestRun.step(
"Check number of dirty data on exported object after core removal interruption."):
cache_dirty_blocks_after = cache.get_dirty_blocks()
if cache_dirty_blocks_after >= cache_dirty_blocks_before:
TestRun.LOGGER.error("Quantity of dirty lines after core removal interruption "
"should be lower.")
if int(cache_dirty_blocks_after) == 0:
TestRun.LOGGER.error("Quantity of dirty lines after core removal interruption "
"should not be zero.")
with TestRun.step("Remove core from cache."):
core.remove_core()
with TestRun.step("Stop cache."):
cache.stop()
with TestRun.step("Mount core device."):
core_part.mount(mount_point)
with TestRun.step("Check md5 sum of test file again."):
if test_file_md5sum_before != test_file.md5sum():
TestRun.LOGGER.error(
"Md5 sums before and after interrupting core removal are different.")
with TestRun.step("Unmount core device."):
core_part.unmount()
@pytest.mark.parametrize("filesystem", Filesystem)
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_interrupt_cache_mode_switch_immediately(cache_mode, filesystem):
"""
title: Test if OpenCAS works correctly after cache mode switching immediate interruption.
description: |
Negative test of the ability of OpenCAS to handle cache mode switching
immediate interruption.
pass_criteria:
- No system crash.
- Cache mode will not be switched after interruption.
- Flushing would be stopped after interruption.
- Md5sum are correct during all test steps.
- Dirty blocks quantity after interruption is equal or lower.
"""
with TestRun.step("Prepare cache and core."):
cache_part, core_part = prepare()
for _ in TestRun.iteration(range(iterations_per_config),
f"Reload cache configuration {iterations_per_config} times."):
with TestRun.step("Start cache."):
cache = casadm.start_cache(cache_part, cache_mode, force=True)
with TestRun.step("Set cleaning policy to NOP."):
cache.set_cleaning_policy(CleaningPolicy.nop)
with TestRun.step(f"Add core device with {filesystem} filesystem and mount it."):
core_part.create_filesystem(filesystem)
core = cache.add_core(core_part)
core.mount(mount_point)
with TestRun.step(f"Create test file in mount point of exported object."):
test_file = create_test_file()
with TestRun.step("Check md5 sum of test file."):
test_file_md5sum_before = test_file.md5sum()
with TestRun.step("Get number of dirty data on exported object before interruption."):
os_utils.sync()
os_utils.drop_caches(DropCachesMode.ALL)
cache_dirty_blocks_before = cache.get_dirty_blocks()
with TestRun.step("Start switching cache mode."):
flush_pid = TestRun.executor.run_in_background(cli.set_cache_mode_cmd(
str(CacheMode.DEFAULT.name.lower()), str(cache.cache_id), "yes"))
with TestRun.step("Send interruption signal."):
TestRun.executor.run(f"kill -s SIGINT {flush_pid}")
with TestRun.step("Check number of dirty data on exported object after interruption."):
cache_dirty_blocks_after = cache.get_dirty_blocks()
if cache_dirty_blocks_after >= cache_dirty_blocks_before:
TestRun.LOGGER.error("Quantity of dirty lines after cache flush interruption "
"should be lower.")
if int(cache_dirty_blocks_after) == 0:
TestRun.LOGGER.error("Quantity of dirty lines after cache flush interruption "
"should not be zero.")
with TestRun.step("Check cache mode."):
if cache.get_cache_mode() != cache_mode:
TestRun.LOGGER.error("Cache mode should remain the same.")
with TestRun.step("Unmount core and stop cache."):
core.unmount()
cache.stop()
with TestRun.step("Mount core device."):
core_part.mount(mount_point)
with TestRun.step("Check md5 sum of test file again."):
if test_file_md5sum_before != test_file.md5sum():
TestRun.LOGGER.error(
"Md5 sums before and after interrupting core removal are different.")
with TestRun.step("Unmount core device."):
core_part.unmount()
@pytest.mark.parametrize("filesystem", Filesystem)
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_interrupt_cache_mode_switch_delayed(cache_mode, filesystem):
"""
title: Test if OpenCAS works correctly after cache mode switching delayed interruption.
description: |
Negative test of the ability of OpenCAS to handle cache mode switching
interruption with delay.
pass_criteria:
- No system crash.
- Cache mode cannot be interrupted with delay.
- Md5sum are correct during all test steps.
- Dirty blocks quantity after cache mode switching is zero.
"""
with TestRun.step("Prepare cache and core."):
cache_part, core_part = prepare()
for _ in TestRun.iteration(range(iterations_per_config),
f"Reload cache configuration {iterations_per_config} times."):
with TestRun.step("Start cache."):
cache = casadm.start_cache(cache_part, cache_mode, force=True)
with TestRun.step("Set cleaning policy to NOP."):
cache.set_cleaning_policy(CleaningPolicy.nop)
with TestRun.step(f"Add core device with {filesystem} filesystem and mount it."):
core_part.create_filesystem(filesystem)
core = cache.add_core(core_part)
core.mount(mount_point)
with TestRun.step(f"Create test file in mount point of exported object."):
test_file = create_test_file()
with TestRun.step("Check md5 sum of test file."):
test_file_md5sum_before = test_file.md5sum()
with TestRun.step("Start switching cache mode."):
flush_pid = TestRun.executor.run_in_background(cli.set_cache_mode_cmd(
str(CacheMode.DEFAULT.name.lower()), str(cache.cache_id), "yes"))
sleep(2)
with TestRun.step("Send interruption signal."):
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core.core_id)
while percentage < 50:
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core.core_id)
TestRun.executor.run(f"kill -s SIGINT {flush_pid}")
with TestRun.step(
"Get quantity of dirty data on exported object after sending interruption "
"signal to cas to stop mode switching."):
if int(cache.get_dirty_blocks()) != 0:
TestRun.LOGGER.error("Quantity of dirty lines should be zero now.")
with TestRun.step("Check cache mode."):
if cache.get_cache_mode() == cache_mode:
TestRun.LOGGER.error("Cache mode should have changed.")
with TestRun.step("Unmount core and stop cache."):
core.unmount()
cache.stop()
with TestRun.step("Mount core device."):
core_part.mount(mount_point)
with TestRun.step("Check md5 sum of test file again."):
if test_file_md5sum_before != test_file.md5sum():
TestRun.LOGGER.error(
"Md5 sums before and after interrupting core removal are different.")
with TestRun.step("Unmount core device."):
core_part.unmount()
@pytest.mark.parametrize("filesystem", Filesystem)
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_interrupt_cache_stop(cache_mode, filesystem):
"""
title: Test if OpenCAS works correctly after cache stopping interruption.
description: |
Negative test of the ability of OpenCAS to handle cache's stop interruption.
pass_criteria:
- No system crash.
- Flushing would be stopped after interruption.
- Md5sum are correct during all test steps.
- Dirty blocks quantity after interruption is lower but non-zero.
"""
with TestRun.step("Prepare cache and core."):
cache_part, core_part = prepare()
for _ in TestRun.iteration(range(iterations_per_config),
f"Reload cache configuration {iterations_per_config} times."):
with TestRun.step("Start cache."):
cache = casadm.start_cache(cache_part, cache_mode, force=True)
with TestRun.step("Set cleaning policy to NOP."):
cache.set_cleaning_policy(CleaningPolicy.nop)
with TestRun.step(f"Add core device with {filesystem} filesystem and mount it."):
core_part.create_filesystem(filesystem)
core = cache.add_core(core_part)
core.mount(mount_point)
with TestRun.step(f"Create test file in mount point of exported object."):
test_file = create_test_file()
with TestRun.step("Check md5 sum of test file."):
test_file_md5sum_before = test_file.md5sum()
with TestRun.step("Get number of dirty data on exported object before interruption."):
os_utils.sync()
os_utils.drop_caches(DropCachesMode.ALL)
cache_dirty_blocks_before = cache.get_dirty_blocks()
with TestRun.step("Unmount core."):
core.unmount()
with TestRun.step("Start stopping cache."):
flush_pid = TestRun.executor.run_in_background(cli.stop_cmd(str(cache.cache_id)))
sleep(2)
with TestRun.step("Interrupt cache stopping."):
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core.core_id)
while percentage < 50:
percentage = casadm_parser.get_flushing_progress(cache.cache_id, core.core_id)
TestRun.executor.run(f"kill -s SIGINT {flush_pid}")
with TestRun.step("Check number of dirty data on exported object after interruption."):
cache_dirty_blocks_after = cache.get_dirty_blocks()
if cache_dirty_blocks_after >= cache_dirty_blocks_before:
TestRun.LOGGER.error("Quantity of dirty lines after cache stop interruption "
"should be lower.")
if int(cache_dirty_blocks_after) == 0:
TestRun.LOGGER.error("Quantity of dirty lines after cache stop interruption "
"should not be zero.")
with TestRun.step("Stop cache."):
cache.stop()
with TestRun.step("Mount core device."):
core_part.mount(mount_point)
with TestRun.step("Check md5 sum of test file again."):
if test_file_md5sum_before != test_file.md5sum():
TestRun.LOGGER.error("Md5 sums before and after interrupting"
" cache stop are different.")
with TestRun.step("Unmount core device."):
core_part.unmount()
def prepare():
cache_dev = TestRun.disks['cache']
cache_dev.create_partitions([cache_size])
cache_part = cache_dev.partitions[0]
core_dev = TestRun.disks['core']
core_dev.create_partitions([cache_size * 2])
core_part = core_dev.partitions[0]
Udev.disable()
return cache_part, core_part
def create_test_file():
from test_utils.filesystem.file import File
from test_tools.dd import Dd
bs = Size(512, Unit.KibiByte)
cnt = int(cache_size.value / bs.value)
test_file = File.create_file(test_file_path)
dd = Dd().output(test_file_path) \
.input("/dev/zero") \
.block_size(bs) \
.count(cnt)
dd.run()
test_file.refresh_item()
return test_file