diff --git a/test/functional/test-framework b/test/functional/test-framework index a3757d1..308b014 160000 --- a/test/functional/test-framework +++ b/test/functional/test-framework @@ -1 +1 @@ -Subproject commit a3757d106247bbc2c7b9fa6742f9dbbbeb3c24ea +Subproject commit 308b014e2b6f7b9eaac6217270d6c9f6b7d4e208 diff --git a/test/functional/tests/lazy_writes/recovery/__init__.py b/test/functional/tests/lazy_writes/recovery/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/functional/tests/lazy_writes/recovery/recovery_tests_methods.py b/test/functional/tests/lazy_writes/recovery/recovery_tests_methods.py new file mode 100644 index 0000000..ceaf71e --- /dev/null +++ b/test/functional/tests/lazy_writes/recovery/recovery_tests_methods.py @@ -0,0 +1,51 @@ +from datetime import timedelta + +from core.test_run import TestRun +from test_tools import fs_utils +from test_tools.dd import Dd +from test_utils import os_utils +from test_utils.filesystem.file import File +from test_utils.size import Size, Unit + + +def create_test_files(test_file_size): + source_file = fs_utils.create_random_test_file("/tmp/source_test_file", test_file_size) + target_file = File.create_file("/tmp/target_test_file") + return source_file, target_file + + +def copy_file(source, target, size, direct=None): + dd = Dd() \ + .input(source) \ + .output(target) \ + .block_size(Size(1, Unit.Blocks4096)) \ + .count(int(size.get_value(Unit.Blocks4096))) + + if direct == "oflag": + dd.oflag("direct") + elif direct == "iflag": + dd.iflag("direct") + dd.run() + + +def compare_files(file1, file2, should_differ=False): + file1_md5 = file1.md5sum() + file2_md5 = file2.md5sum() + if should_differ ^ (file1_md5 != file2_md5): + if should_differ: + TestRun.fail("Source and target file checksums are identical.") + else: + TestRun.fail("Source and target file checksums are different.") + + +def power_cycle_dut(wait_for_flush_begin=False, core_device=None): + if wait_for_flush_begin: + if not core_device: + raise Exception("Core device is None.") + TestRun.LOGGER.info("Waiting for flushing to begin...") + core_writes_before = core_device.get_io_stats().sectors_written + os_utils.wait(lambda: core_writes_before < core_device.get_io_stats().sectors_written, + timedelta(minutes=3), + timedelta(milliseconds=100)) + power_control = TestRun.plugin_manager.get_plugin('power_control') + power_control.power_cycle() diff --git a/test/functional/tests/lazy_writes/recovery/test_recovery_all_options.py b/test/functional/tests/lazy_writes/recovery/test_recovery_all_options.py new file mode 100644 index 0000000..ae989c9 --- /dev/null +++ b/test/functional/tests/lazy_writes/recovery/test_recovery_all_options.py @@ -0,0 +1,125 @@ +# +# Copyright(c) 2019-2020 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause-Clear +# + +import os +import pytest +from api.cas import casadm +from api.cas.cache_config import CacheMode, CacheModeTrait, CacheLineSize, CleaningPolicy, \ + FlushParametersAcp +from core.test_run import TestRun +from storage_devices.disk import DiskTypeSet, DiskType, DiskTypeLowerThan +from test_tools.disk_utils import Filesystem +from test_tools.fio.fio import Fio +from test_tools.fio.fio_param import IoEngine, ReadWrite +from test_utils import os_utils +from test_utils.filesystem.file import File +from test_utils.os_utils import DropCachesMode +from test_utils.size import Size, Unit +from test_utils.time import Time +from tests.lazy_writes.recovery.recovery_tests_methods import power_cycle_dut + +test_file_size = Size(300, Unit.MebiByte) +mount_point = "/mnt" +filename = "fio_test_file" +pattern = "0xabcd" +other_pattern = "0x0000" + + +@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand])) +@pytest.mark.require_disk("core", DiskTypeLowerThan("cache")) +@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites)) +@pytest.mark.parametrize("cleaning_policy", CleaningPolicy) +@pytest.mark.parametrize("cache_line_size", CacheLineSize) +@pytest.mark.parametrize("filesystem", Filesystem) +@pytest.mark.require_plugin("power_control") +def test_recovery_all_options(cache_mode, cache_line_size, cleaning_policy, filesystem): + """ + title: Test for recovery after reset with various cache options. + description: Verify that unflushed data can be safely recovered after reset. + pass_criteria: + - CAS recovers successfully after reboot + - No data corruption + """ + with TestRun.step("Prepare cache and core devices."): + cache_disk = TestRun.disks['cache'] + core_disk = TestRun.disks['core'] + cache_disk.create_partitions([Size(200, Unit.MebiByte)]) + core_disk.create_partitions([Size(2000, Unit.MebiByte)] * 2) + cache_device = cache_disk.partitions[0] + core_device = core_disk.partitions[0] + core_device_link = core_device.get_device_link("/dev/disk/by-id") + cache_device_link = cache_device.get_device_link("/dev/disk/by-id") + + test_file = File(os.path.join(mount_point, filename)) + file_operation(test_file.full_path, pattern, ReadWrite.write) + file_md5 = test_file.md5sum() + + with TestRun.step(f"Make {filesystem} on core device."): + core_device.create_filesystem(filesystem) + + with TestRun.step("Mount core device."): + core_device.mount(mount_point) + file_operation(test_file.full_path, other_pattern, ReadWrite.write) + os_utils.drop_caches(DropCachesMode.ALL) + + with TestRun.step("Unmount core device."): + core_device.unmount() + + with TestRun.step(f"Start cache in {cache_mode.name} with given configuration."): + cache = casadm.start_cache(cache_device, cache_mode, cache_line_size, force=True) + cache.set_cleaning_policy(cleaning_policy) + if cleaning_policy == CleaningPolicy.acp: + cache.set_params_acp(FlushParametersAcp(wake_up_time=Time(seconds=1))) + + with TestRun.step("Add core."): + core = cache.add_core(core_device) + + with TestRun.step("Mount CAS device."): + core.mount(mount_point) + file_operation(test_file.full_path, pattern, ReadWrite.write) + + with TestRun.step("Change cache mode to Write-Through without flush option."): + cache.set_cache_mode(CacheMode.WT, flush=False) + + with TestRun.step("Reset platform."): + os_utils.sync() + core.unmount() + TestRun.LOGGER.info(f"Number of dirty blocks in cache: {cache.get_dirty_blocks()}") + power_cycle_dut() + cache_device.full_path = cache_device_link.get_target() + core_device.full_path = core_device_link.get_target() + + with TestRun.step("Try to start cache without load and force option."): + try: + casadm.start_cache(cache_device, cache_mode, cache_line_size) + TestRun.fail("Cache started without load or force option.") + except Exception: + TestRun.LOGGER.info("Cache did not start without load and force option.") + + with TestRun.step("Load cache and stop it with flush."): + cache = casadm.load_cache(cache_device) + cache.stop() + + with TestRun.step("Check md5sum of tested file on core device."): + core_device.mount(mount_point) + cas_md5 = test_file.md5sum() + core_device.unmount() + if cas_md5 == file_md5: + TestRun.LOGGER.info("Source and target file checksums are identical.") + else: + TestRun.fail("Source and target file checksums are different.") + + +def file_operation(target_path, data_pattern, io_pattern): + fio = (Fio().create_command() + .target(target_path) + .io_engine(IoEngine.libaio) + .size(test_file_size) + .read_write(io_pattern) + .block_size(Size(1, Unit.Blocks4096)) + .verification_with_pattern(data_pattern) + .direct() + .set_param("do_verify", 0)) + fio.run() diff --git a/test/functional/tests/lazy_writes/recovery/test_recovery_flush_reset.py b/test/functional/tests/lazy_writes/recovery/test_recovery_flush_reset.py new file mode 100644 index 0000000..1b70003 --- /dev/null +++ b/test/functional/tests/lazy_writes/recovery/test_recovery_flush_reset.py @@ -0,0 +1,186 @@ +# +# Copyright(c) 2019-2020 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause-Clear +# + +import os +import pytest +from api.cas import casadm, cli +from api.cas.cache_config import CacheMode, CacheModeTrait, CleaningPolicy, SeqCutOffPolicy +from core.test_run import TestRun +from storage_devices.disk import DiskTypeSet, DiskType, DiskTypeLowerThan +from test_tools.dd import Dd +from test_tools.disk_utils import Filesystem +from test_utils import os_utils +from test_utils.os_utils import Udev +from test_utils.output import CmdException +from test_utils.size import Size, Unit +from tests.lazy_writes.recovery.recovery_tests_methods import create_test_files, copy_file, \ + compare_files, power_cycle_dut + +mount_point = "/mnt" +test_file_size = Size(1.5, Unit.GibiByte) + + +@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand])) +@pytest.mark.require_disk("core", DiskTypeLowerThan("cache")) +@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites)) +@pytest.mark.require_plugin("power_control") +def test_recovery_flush_reset_raw(cache_mode): + """ + title: Recovery after reset during cache flushing - test on raw device. + description: | + Verify that unflushed data can be safely recovered, when reset was pressed during + data flushing on raw device. + pass_criteria: + - CAS recovers successfully after reboot + - No data corruption + """ + with TestRun.step("Prepare cache and core devices."): + cache_disk = TestRun.disks['cache'] + core_disk = TestRun.disks['core'] + cache_disk.create_partitions([Size(2, Unit.GibiByte)]) + core_disk.create_partitions([Size(16, Unit.GibiByte)] * 2) + cache_device = cache_disk.partitions[0] + core_device = core_disk.partitions[0] + core_device_link = core_device.get_device_link("/dev/disk/by-id") + cache_device_link = cache_device.get_device_link("/dev/disk/by-id") + + with TestRun.step("Create test files."): + source_file, target_file = create_test_files(test_file_size) + + with TestRun.step("Setup cache and add core."): + cache = casadm.start_cache(cache_device, cache_mode) + core = cache.add_core(core_device) + cache.set_cleaning_policy(CleaningPolicy.nop) + cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) + + with TestRun.step("Copy file to CAS."): + copy_file(source=source_file.full_path, target=core.system_path, size=test_file_size, + direct="oflag") + + with TestRun.step("Sync and flush buffers."): + os_utils.sync() + output = TestRun.executor.run(f"hdparm -f {core.system_path}") + if output.exit_code != 0: + raise CmdException("Error during hdparm", output) + + with TestRun.step("Trigger flush."): + TestRun.executor.run_in_background(cli.flush_cache_cmd(f"{cache.cache_id}")) + + with TestRun.step("Hard reset DUT during data flushing."): + power_cycle_dut(wait_for_flush_begin=True, core_device=core_device) + cache_device.full_path = cache_device_link.get_target() + core_device.full_path = core_device_link.get_target() + + with TestRun.step("Copy file from core and check if current md5sum is different than " + "before restart."): + copy_file(source=core_device_link.get_target(), target=target_file.full_path, + size=test_file_size, direct="iflag") + compare_files(source_file, target_file, should_differ=True) + + with TestRun.step("Load cache."): + cache = casadm.load_cache(cache_device) + if cache.get_dirty_blocks() == Size.zero(): + TestRun.fail("There are no dirty blocks on cache device.") + + with TestRun.step("Stop cache with dirty data flush."): + core_writes_before = core_device.get_io_stats().sectors_written + cache.stop() + if core_writes_before >= core_device.get_io_stats().sectors_written: + TestRun.fail("No data was flushed after stopping cache started with load option.") + + with TestRun.step("Copy test file from core device to temporary location. " + "Compare it with the first version – they should be the same."): + copy_file(source=core_device_link.get_target(), target=target_file.full_path, + size=test_file_size, direct="iflag") + compare_files(source_file, target_file) + + with TestRun.step("Cleanup core device and remove test files."): + target_file.remove() + source_file.remove() + + +@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand])) +@pytest.mark.require_disk("core", DiskTypeLowerThan("cache")) +@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites)) +@pytest.mark.parametrize("fs", [Filesystem.xfs, Filesystem.ext4]) +@pytest.mark.require_plugin("power_control") +def test_recovery_flush_reset_fs(cache_mode, fs): + """ + title: Recovery after reset during cache flushing - test on filesystem. + description: | + Verify that unflushed data can be safely recovered, when reset was pressed during + data flushing on filesystem. + pass_criteria: + - CAS recovers successfully after reboot + - No data corruption + """ + with TestRun.step("Prepare cache and core devices."): + cache_disk = TestRun.disks['cache'] + core_disk = TestRun.disks['core'] + cache_disk.create_partitions([Size(2, Unit.GibiByte)]) + core_disk.create_partitions([Size(16, Unit.GibiByte)] * 2) + cache_device = cache_disk.partitions[0] + core_device = core_disk.partitions[0] + core_device_link = core_device.get_device_link("/dev/disk/by-id") + cache_device_link = cache_device.get_device_link("/dev/disk/by-id") + + with TestRun.step(f"Create {fs} filesystem on core."): + core_device.create_filesystem(fs) + + with TestRun.step("Create test files."): + source_file, target_file = create_test_files(test_file_size) + + with TestRun.step("Setup cache and add core."): + cache = casadm.start_cache(cache_device, cache_mode) + Udev.disable() + core = cache.add_core(core_device) + cache.set_cleaning_policy(CleaningPolicy.nop) + cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) + + with TestRun.step("Mount CAS device."): + core.mount(mount_point) + + with TestRun.step("Copy file to CAS."): + copy_file(source=source_file.full_path, + target=os.path.join(mount_point, "source_test_file"), + size=test_file_size, direct="oflag") + + with TestRun.step("Unmount CAS device."): + core.unmount() + + with TestRun.step("Trigger flush."): + TestRun.executor.run_in_background(cli.flush_cache_cmd(f"{cache.cache_id}")) + + with TestRun.step("Hard reset DUT during data flushing."): + power_cycle_dut(True, core_device) + cache_device.full_path = cache_device_link.get_target() + core_device.full_path = core_device_link.get_target() + + with TestRun.step("Load cache."): + cache = casadm.load_cache(cache_device) + if cache.get_dirty_blocks() == Size.zero(): + TestRun.fail("There are no dirty blocks on cache device.") + + with TestRun.step("Stop cache with dirty data flush."): + core_writes_before = core_device.get_io_stats().sectors_written + cache.stop() + if core_writes_before >= core_device.get_io_stats().sectors_written: + TestRun.fail("No data was flushed after stopping cache started with load option.") + + with TestRun.step("Mount core device."): + core_device.mount(mount_point) + + with TestRun.step("Copy test file from core device to temporary location. " + "Compare it with the first version – they should be the same."): + copy_file(source=os.path.join(mount_point, "source_test_file"), + target=target_file.full_path, + size=test_file_size, direct="iflag") + compare_files(source_file, target_file) + + with TestRun.step("Unmount core device and remove test files."): + core_device.unmount() + target_file.remove() + source_file.remove() + Udev.enable() diff --git a/test/functional/tests/lazy_writes/recovery/test_recovery_unplug.py b/test/functional/tests/lazy_writes/recovery/test_recovery_unplug.py new file mode 100644 index 0000000..508605f --- /dev/null +++ b/test/functional/tests/lazy_writes/recovery/test_recovery_unplug.py @@ -0,0 +1,165 @@ +# +# Copyright(c) 2019-2020 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause-Clear +# + +import os +import pytest +from api.cas import casadm +from api.cas.cache_config import CacheMode, CacheModeTrait, CacheLineSize +from core.test_run import TestRun +from storage_devices.disk import DiskTypeSet, DiskType, DiskTypeLowerThan +from test_tools.dd import Dd +from test_tools.disk_utils import Filesystem +from test_utils.size import Size, Unit +from tests.lazy_writes.recovery.recovery_tests_methods import create_test_files, copy_file, \ + compare_files + +test_file_size = Size(0.5, Unit.GibiByte) +mount_point = "/mnt" +test_file_path = os.path.join(mount_point, "test_file") + + +@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand])) +@pytest.mark.require_disk("core", DiskTypeLowerThan("cache")) +@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites)) +@pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB]) +@pytest.mark.parametrize("filesystem", Filesystem) +@pytest.mark.parametrize("direct", [True, False]) +@pytest.mark.require_plugin("power_control") +def test_recovery_unplug_cache_fs(cache_mode, cls, filesystem, direct): + """ + title: Test for recovery after cache drive removal - test with filesystem. + description: | + Verify that unflushed data can be safely recovered after, when SSD drive is removed + after write completion - test with filesystem. + pass_criteria: + - CAS recovers successfully after cache drive unplug + - No data corruption + """ + with TestRun.step("Prepare devices"): + cache_disk = TestRun.disks['cache'] + core_disk = TestRun.disks['core'] + cache_disk.create_partitions([Size(2, Unit.GibiByte)]) + core_disk.create_partitions([Size(16, Unit.GibiByte)]) + cache_device = cache_disk.partitions[0] + core_device = core_disk.partitions[0] + + with TestRun.step("Create test files."): + source_file, target_file = create_test_files(test_file_size) + + with TestRun.step("Create filesystem on core device."): + core_device.create_filesystem(filesystem) + + with TestRun.step("Start cache and add core."): + cache = casadm.start_cache(cache_device, cache_mode, cls) + core = cache.add_core(core_device) + + with TestRun.step("Mount CAS device."): + core.mount(mount_point) + + with TestRun.step("Copy file to CAS."): + copy_file(source=source_file.full_path, target=test_file_path, + size=test_file_size, direct="oflag" if direct else None) + TestRun.LOGGER.info(str(core.get_statistics())) + + with TestRun.step("Unmount CAS device."): + core.unmount() + + with TestRun.step("Unplug cache device."): + cache_disk.unplug() + TestRun.LOGGER.info(f"List caches:\n{casadm.list_caches().stdout}") + TestRun.LOGGER.info(f"Dirty blocks on cache: " + f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") + + with TestRun.step("Stop cache."): + cache.stop() + + with TestRun.step("Plug missing cache device."): + cache_disk.plug() + + with TestRun.step("Load cache."): + cache = casadm.load_cache(cache_device) + TestRun.LOGGER.info(f"Dirty blocks on cache: " + f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") + + with TestRun.step("Stop cache with data flush."): + cache.stop() + + with TestRun.step("Mount core device."): + core_device.mount(mount_point) + + with TestRun.step("Copy file from core device and check md5sum."): + copy_file(source=test_file_path, target=target_file.full_path, + size=test_file_size, direct="iflag" if direct else None) + compare_files(source_file, target_file) + + with TestRun.step("Unmount core device and remove files."): + core_device.unmount() + target_file.remove() + source_file.remove() + + +@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand])) +@pytest.mark.require_disk("core", DiskTypeLowerThan("cache")) +@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites)) +@pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB]) +@pytest.mark.require_plugin("power_control") +def test_recovery_unplug_cache_raw(cache_mode, cls): + """ + title: Test for recovery after cache drive removal - test on raw device. + description: | + Verify that unflushed data can be safely recovered after, when SSD drive is removed + after write completion - test on raw device. + pass_criteria: + - CAS recovers successfully after cache drive unplug + - No data corruption + """ + with TestRun.step("Prepare devices"): + cache_disk = TestRun.disks['cache'] + core_disk = TestRun.disks['core'] + cache_disk.create_partitions([Size(2, Unit.GibiByte)]) + core_disk.create_partitions([Size(16, Unit.GibiByte)]) + cache_device = cache_disk.partitions[0] + core_device = core_disk.partitions[0] + + with TestRun.step("Create test files."): + source_file, target_file = create_test_files(test_file_size) + + with TestRun.step("Start cache and add core."): + cache = casadm.start_cache(cache_device, cache_mode, cls) + core = cache.add_core(core_device) + + with TestRun.step("Copy file to CAS."): + copy_file(source=source_file.full_path, target=core.system_path, + size=test_file_size, direct="oflag") + TestRun.LOGGER.info(str(core.get_statistics())) + + with TestRun.step("Unplug cache device."): + cache_disk.unplug() + TestRun.LOGGER.info(f"List caches:\n{casadm.list_caches().stdout}") + TestRun.LOGGER.info(f"Dirty blocks on cache: " + f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") + + with TestRun.step("Stop cache."): + cache.stop() + + with TestRun.step("Plug missing cache device."): + cache_disk.plug() + + with TestRun.step("Load cache."): + cache = casadm.load_cache(cache_device) + TestRun.LOGGER.info(f"Dirty blocks on cache: " + f"{cache.get_dirty_blocks().get_value(Unit.Blocks4096)}") + + with TestRun.step("Stop cache with data flush."): + cache.stop() + + with TestRun.step("Copy file from core device and check md5sum."): + copy_file(source=core_device.system_path, target=target_file.full_path, + size=test_file_size, direct="iflag") + compare_files(source_file, target_file) + + with TestRun.step("Cleanup core device and remove test files."): + target_file.remove() + source_file.remove()