From ab99c08504c1470d862e2f36ef03ba12e551efdb Mon Sep 17 00:00:00 2001 From: Michal Mielewczyk Date: Wed, 13 Apr 2022 14:24:53 +0200 Subject: [PATCH] tests: add test_activate_corrupted Signed-off-by: Michal Mielewczyk --- .../test_fault_injection_standby.py | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 test/functional/tests/fault_injection/test_fault_injection_standby.py diff --git a/test/functional/tests/fault_injection/test_fault_injection_standby.py b/test/functional/tests/fault_injection/test_fault_injection_standby.py new file mode 100644 index 0000000..d1bb1c3 --- /dev/null +++ b/test/functional/tests/fault_injection/test_fault_injection_standby.py @@ -0,0 +1,147 @@ +# +# Copyright(c) 2019-2022 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +# + +import pytest +from collections import namedtuple +import random + +from api.cas import casadm +from api.cas import dmesg +from api.cas.cli import casadm_bin +from core.test_run import TestRun +from storage_devices.disk import DiskType, DiskTypeSet, DiskTypeLowerThan +from test_utils.size import Size, Unit +from api.cas.cli_messages import check_stderr_msg, missing_param, disallowed_param +from api.cas.cache_config import CacheLineSize, CacheMode +from api.cas.cli import standby_activate_cmd +from api.cas.ioclass_config import IoClass +from test_tools.dd import Dd +from test_utils.os_utils import sync +from test_utils.filesystem.file import File + + +block_size = Size(1, Unit.Blocks512) + + +@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.nand, DiskType.optane])) +@pytest.mark.require_disk("core", DiskTypeLowerThan("cache")) +def test_activate_corrupted(): + """ + title: Activate cache instance on corrupted metadata + description: | + Initialize standby cache, populate it with corrupted metadata, detach and try to activate. + pass_criteria: + - Kernel panic doesn't occur + """ + with TestRun.step("Prepare devices for the cache and core."): + cache_device = TestRun.disks["cache"] + cache_device.create_partitions([Size(200, Unit.MebiByte)]) + cache_device = cache_device.partitions[0] + core_device = TestRun.disks["core"] + core_device.create_partitions([Size(500, Unit.MebiByte)]) + core_device = core_device.partitions[0] + + with TestRun.step("Prepare metadata dump"): + cache_id = 1 + cls = CacheLineSize.LINE_32KiB + md_dump = prepare_md_dump(cache_device, core_device, cls, cache_id) + + for offset in get_offsets_to_corrupt(md_dump.size, block_size): + + with TestRun.step("Prepare standby instance"): + cache = casadm.standby_init( + cache_dev=cache_device, + cache_line_size=int(cls.value.value / Unit.KibiByte.value), + cache_id=cache_id, + force=True, + ) + + with TestRun.step(f"Corrupt {block_size} on the offset {offset*block_size}"): + corrupted_md = prepare_corrupted_md(md_dump, offset, block_size) + + with TestRun.step(f"Copy corrupted metadata to the passive instance"): + Dd().input(corrupted_md.full_path).output(f"/dev/cas-cache-{cache_id}").run() + sync() + + with TestRun.step(f"Standby detach"): + cache.standby_detach() + + with TestRun.step("Try to activate cache instance"): + output = TestRun.executor.run( + standby_activate_cmd(cache_dev=cache_device.path, cache_id=str(cache_id)) + ) + + with TestRun.step("Per iteration cleanup"): + cache.stop() + corrupted_md.remove(force=True, ignore_errors=True) + + with TestRun.step("Test cleanup"): + md_dump.remove() + + +def get_offsets_to_corrupt(md_size, bs, count=100): + offsets = list(range(0, int(md_size.value), bs.value)) + offsets = random.choices(offsets, k=min(len(offsets), count)) + + # Offset is expresed as a number of blocks + return [int(o / bs.value) for o in offsets] + + +def prepare_md_dump(cache_device, core_device, cls, cache_id): + with TestRun.step("Setup WB cache instance with one core"): + cache = casadm.start_cache( + cache_dev=cache_device, + cache_line_size=cls, + cache_mode=CacheMode.WB, + cache_id=cache_id, + force=True, + ) + cache.add_core(core_device) + + with TestRun.step("Get metadata size"): + dmesg_out = TestRun.executor.run_expect_success("dmesg").stdout + md_size = dmesg.get_metadata_size(dmesg_out) + + with TestRun.step("Dump the metadata of the cache"): + dump_file_path = "/tmp/test_activate_corrupted.dump" + md_dump = File(dump_file_path) + md_dump.remove(force=True, ignore_errors=True) + + dd_count = int(md_size / Size(1, Unit.MebiByte)) + 1 + ( + Dd() + .input(cache_device.path) + .output(md_dump.full_path) + .block_size(Size(1, Unit.MebiByte)) + .count(dd_count) + .run() + ) + md_dump.refresh_item() + + with TestRun.step("Stop cache device"): + cache.stop() + + return md_dump + + +def prepare_corrupted_md(md_dump, offset_to_corrupt, bs): + invalid_dump_path = "/tmp/test_activate_corrupted.invalid_dump" + dd_count = offset_to_corrupt + 1 + + md_dump.copy(destination=invalid_dump_path, force=True) + corrupted_md = File(invalid_dump_path) + ( + Dd() + .input("/dev/urandom") + .output(corrupted_md.full_path) + .block_size(bs) + .count(dd_count) + .seek(offset_to_corrupt) + .conv("notrunc") + .run() + ) + corrupted_md.refresh_item() + + return corrupted_md