# # Copyright(c) 2022 Intel Corporation # SPDX-License-Identifier: BSD-3-Clause # from datetime import timedelta import os import pytest import time from api.cas import casadm, cli from api.cas.cache_config import ( CacheMode, CleaningPolicy, CacheModeTrait, CacheLineSize, SeqCutOffPolicy, FlushParametersAlru, Time, ) from core.test_run import TestRun from storage_devices.disk import DiskType, DiskTypeSet from storage_devices.drbd import Drbd from test_tools.dd import Dd from test_tools.disk_utils import Filesystem from test_utils.filesystem.file import File from test_tools.fio.fio import Fio from test_tools.fio.fio_param import ReadWrite from test_tools.fs_utils import create_directory from test_utils.drbd import Resource, Node from type_def.size import Size, Unit from test_failover_multihost import check_drbd_installed cache_size = Size(10, Unit.GibiByte) metadata_size = Size(1024, Unit.MebiByte) core_size = Size(15, Unit.GibiByte) cache_id = 37 cache_exp_obj_path = f"/dev/cas-cache-{cache_id}" # Calculate random io size (in bytes) to insert (on avarage) all cachelines # for given cache/core device size, cacheline size and I/O block size. # # When inserting to cache randomly with block size < cacheline size it is not # enough to send single I/O per one cacheline, as some operations will hit # the same cacheline, leaving some cachelines unused. Increasing number of I/O # to (cache_capcity_B / io_block_size) would definetely fill the entire cache, # but would also overfill the cache, resulting in unnecessary eviction (assuming # core size > cache capacity). # # This function calculates just the right amount of I/O to insert exactly the right # (cache_capacity_b / cls) amount of cachelines (statistically). Due to random fluctuations # cache occupancy might be slightly smaller or a slight overfill might occur - resulting # in eviction at the end of fill process. def calc_io_size(cache_size, core_size, cache_line_size, block_size): target_occupancy = 1.0 # increase to avoid underfill due to random I/O fluctuations bs = block_size.value dev_ratio = cache_size.value / core_size.value bs_ratio = block_size.value / int(cache_line_size) size = core_size.value * (1 - (1 - target_occupancy * dev_ratio) ** (bs_ratio)) return Size(int(size) // bs * bs, Unit.Byte) def timed_async_power_cycle(): start = time.time() power_control = TestRun.plugin_manager.get_plugin("power_control") power_control.power_cycle(wait_for_connection=False) end = time.time() if end - start > 5: TestRun.LOGGER.warning( f"Power cycle request took {end - start} seconds, this could result in test failure " "due to insufficient dirty data after failover." ) @pytest.mark.os_dependent @pytest.mark.multidut(2) @pytest.mark.require_disk("cache_dev", DiskTypeSet([DiskType.optane])) @pytest.mark.require_disk("core_dev", DiskTypeSet([DiskType.nand])) @pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites)) @pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB]) @pytest.mark.parametrize("cleaning_policy", [c for c in CleaningPolicy if c != CleaningPolicy.nop]) @pytest.mark.parametrize("num_iterations", [2]) def test_failover_during_background_cleaning(cache_mode, cls, cleaning_policy, num_iterations): """ title: Failover sequence with background cleaning: description: Verify proper failover behaviour and data integrity after power failure during background cleaning running. pass_criteria: - Failover procedure success - Data integrity is maintained parametrizations: - cache mode: all cache modes with lazy writes - to make sure dirty data is produced so that metadata synchronization between hosts occurs - cacheline size: 4K, 64K - to test both sector I/O and full-cacheline I/O - cleaning policy - as different policies have separate metadata handling implementation steps: - On 2 DUTs (main and backup) prepare cache device of 10GiB size - On 2 DUTs (main and backup) prepare primary storage device of size 15GiB - On main DUT prefill primary storage device with zeroes - Start a standby cache instance on the backup DUT with parametrized cacheline size - Configure DRBD to replicate cache and core storage from main to backup node - On main DUT: - Start cache on top of cache DRBD device with parametrized cacheline size and cache mode - Set cleaning policy to NOP - Set sequential cutoff to never - Wait for DRBD synchronization - Fill cache with random 50% read/write mix workload, block size 4K - Verify cache is > 25% dirty - Switch to WO cache mode without flush - Calculate checksum of CAS exported object - Switch back to the parametrized cache mode without flush - Switch to parametrized cleaning policy - Wait for the background cleaner to start working (no wait for ACP, according to policy parameters for ALRU) - Verify cleaner is progressing by inspecting dirty statistics - Power off the main DUT - On backup DUT: - stop cache DRBD - set backup DUT as primary for core DRBD - deatch cache drive from standby cache instance - activate standby cache instance directly on the cache drive - verify dirty stats did not increase - calculate checksum of CAS exported object - Verify that the two checksums are equal - Power on the main DUT """ with TestRun.step("Make sure DRBD is installed on both nodes"): check_drbd_installed(TestRun.duts) with TestRun.step("Prepare DUTs"): prepare_devices(TestRun.duts) primary_node, secondary_node = TestRun.duts with TestRun.step("Prepare DRBD config files on both DUTs"): cache_drbd_resource, core_drbd_resource = create_drbd_configs(primary_node, secondary_node) for i in TestRun.iteration(range(num_iterations)): with TestRun.step("Prefill primary storage device with zeroes"), TestRun.use_dut( primary_node ): Dd().block_size(Size(1, Unit.MebiByte)).input("/dev/zero").output( f"{primary_node.core_dev.path}" ).oflag("direct").run() with TestRun.step("Start standby cache instance on secondary DUT"), TestRun.use_dut( secondary_node ): secondary_node.cache = casadm.standby_init( cache_dev=secondary_node.cache_dev, cache_line_size=cls, cache_id=cache_id, force=True, ) for dut in TestRun.duts: with TestRun.step(f"Create DRBD instances on {dut.ip}"), TestRun.use_dut(dut): dut.cache_drbd = Drbd(cache_drbd_resource) dut.cache_drbd.create_metadata(force=True) dut.cache_drbd_dev = dut.cache_drbd.up() dut.core_drbd = Drbd(core_drbd_resource) dut.core_drbd.create_metadata(force=True) dut.core_drbd_dev = dut.core_drbd.up() with TestRun.step( f"Set {primary_node.ip} as primary node for both DRBD instances" ), TestRun.use_dut(primary_node): primary_node.cache_drbd.set_primary() primary_node.core_drbd.set_primary() with TestRun.step( f"Start cache on top of cache DRBD device with cacheline size {cls} and {cache_mode} " "cache mode" ), TestRun.use_dut(primary_node): primary_node.cache = casadm.start_cache( primary_node.cache_drbd_dev, force=True, cache_mode=cache_mode, cache_line_size=cls, cache_id=cache_id, ) core = primary_node.cache.add_core(primary_node.core_drbd_dev) with TestRun.step("Set NOP cleaning policy"), TestRun.use_dut(primary_node): primary_node.cache.set_cleaning_policy(CleaningPolicy.nop) with TestRun.step("Disable sequential cutoff"), TestRun.use_dut(primary_node): primary_node.cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) with TestRun.step("Wait for DRBD synchronization"), TestRun.use_dut(primary_node): primary_node.cache_drbd.wait_for_sync() primary_node.core_drbd.wait_for_sync() with TestRun.step( "Fill cache with random 50% read/write mix workload, block size 4K" ), TestRun.use_dut(primary_node): bs = Size(4, Unit.KibiByte) io_size = calc_io_size(cache_size, core_size, cls, bs) if CacheModeTrait.InsertRead not in CacheMode.get_traits(cache_mode): io_size = io_size * 2 fio = ( Fio() .create_command() .direct(True) .read_write(ReadWrite.randrw) .io_depth(64) .block_size(bs) .size(core_size) .io_size(io_size) .file_name(core.path) ) fio.run() with TestRun.step("Verify cache is > 25% dirty"), TestRun.use_dut(primary_node): dirty_after_initial_io = primary_node.cache.get_statistics( percentage_val=True ).usage_stats.dirty if dirty_after_initial_io < 25: if dirty_after_initial_io == 0.0: TestRun.LOGGER.exception("Expected at least 25% dirty data, got 0") else: TestRun.LOGGER.warning( f"Expected at least 25% dirty data, got {dirty_after_initial_io}" ) with TestRun.step("Switch to WO cache mode without flush"), TestRun.use_dut(primary_node): primary_node.cache.set_cache_mode(CacheMode.WO, flush=False) with TestRun.step("Calculate checksum of CAS exported object"), TestRun.use_dut( primary_node ): checksum1 = TestRun.executor.run(f"md5sum {core.path}").stdout.split()[0] with TestRun.step( f"Switch back to the {cache_mode} cache mode without flush" ), TestRun.use_dut(primary_node): primary_node.cache.set_cache_mode(cache_mode, flush=False) with TestRun.step(f"Switch to {cleaning_policy} cleaning policy"), TestRun.use_dut( primary_node ): primary_node.cache.set_cleaning_policy(cleaning_policy) if cleaning_policy == CleaningPolicy.alru: TestRun.LOGGER.info("Configure ALRU to trigger immediately\n") params = FlushParametersAlru( activity_threshold=Time(milliseconds=0), wake_up_time=Time(seconds=0), staleness_time=Time(seconds=1), ) primary_node.cache.set_params_alru(params) with TestRun.step("Wait 2s"): time.sleep(2) with TestRun.step( "Verify cleaner is progressing by inspecting dirty statistics" ), TestRun.use_dut(primary_node): dirty_after_cleaning = primary_node.cache.get_statistics( percentage_val=True ).usage_stats.dirty TestRun.LOGGER.info( f"Dirty stats change: {dirty_after_initial_io}% -> {dirty_after_cleaning}%" ) # make sure there is cleaning progress if dirty_after_cleaning >= dirty_after_initial_io: TestRun.LOGGER.exception("No cleaning progress detected") # make sure there is dirty data left to clean if dirty_after_cleaning < 20: TestRun.LOGGER.exception("Not enough dirty data") with TestRun.step(f"Power off the main DUT"), TestRun.use_dut(primary_node): timed_async_power_cycle() with TestRun.step("Stop cache DRBD on the secondary node"), TestRun.use_dut(secondary_node): secondary_node.cache_drbd.down() with TestRun.step("Set backup DUT as primary for core DRBD"), TestRun.use_dut( secondary_node ): secondary_node.core_drbd.set_primary() with TestRun.step("Deatch cache drive from standby cache instance"), TestRun.use_dut( secondary_node ): secondary_node.cache.standby_detach() with TestRun.step( "Activate standby cache instance directly on the cache drive" ), TestRun.use_dut(secondary_node): secondary_node.cache.standby_activate(secondary_node.cache_dev) with TestRun.step("Verify there is some dirty data after failover"), TestRun.use_dut( secondary_node ): dirty_after_failover = secondary_node.cache.get_statistics( percentage_val=True ).usage_stats.dirty if dirty_after_failover > dirty_after_cleaning: TestRun.LOGGER.exception("Unexpeted increase in dirty cacheline count") elif dirty_after_failover == 0: TestRun.LOGGER.exception( "No dirty data after failover. This might indicate that power cycle took too " "long or cleaning/network is too fast\n" ) else: TestRun.LOGGER.info(f"Dirty cachelines after failover: {dirty_after_failover}") with TestRun.step("Calculate checksum of CAS exported object"), TestRun.use_dut( secondary_node ): checksum2 = TestRun.executor.run(f"md5sum {core.path}").stdout.split()[0] with TestRun.step("Verify that the two checksums are equal"): if checksum1 != checksum2: TestRun.LOGGER.error( f"Checksum mismatch: primary {checksum1} secondary {checksum2}" ) with TestRun.step("Cleanup after iteration"), TestRun.use_dut(secondary_node): secondary_node.cache.stop(no_data_flush=True) Drbd.down_all() with TestRun.step("Wait for the primary DUT to be back online"), TestRun.use_dut( primary_node ): TestRun.executor.wait_for_connection() @pytest.mark.os_dependent @pytest.mark.multidut(2) @pytest.mark.require_disk("cache_dev", DiskTypeSet([DiskType.optane])) @pytest.mark.require_disk("core_dev", DiskTypeSet([DiskType.nand])) @pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites)) @pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB]) @pytest.mark.parametrize("num_iterations", [2]) def test_failover_during_dirty_flush(cache_mode, cls, num_iterations): """ title: Failover sequence with after power failure during dirty data flush description: Verify proper failover behaviour and data integrity after power failure during user-issued cleaning pass_criteria: - Failover procedure success - Data integrity is maintained parametrizations: - cache mode: all cache modes with lazy writes - to make sure dirty data is produced so that metadata synchronization between hosts occurs - cacheline size: 4K, 64K - to test both sector I/O and full-cacheline I/O steps: - On 2 DUTs (main and backup) prepare cache device of 10GiB size - On 2 DUTs (main and backup) prepare primary storage device of size 15GiB - On main DUT prefill primary storage device with zeroes - Start a standby cache instance on the backup DUT with parametrized cacheline size - Configure DRBD to replicate cache and core storage from main to backup node - On main DUT: - Start cache on top of cache DRBD device with parametrized cacheline size and cache mode - Wait for DRBD synchronization - Set cleaning policy to NOP - Set sequential cutoff to never - Fill cache with random 50% read/write mix workload, block size 4K - Verify cache is > 25% dirty - Switch to WO cache mode without flush - Calculate checksum of CAS exported object - Switch back to the parametrized cache mode without flush - Issue cache flush command - Verify flush is progressing by inspecting dirty statistics - Power off the main DUT - On backup DUT: - stop cache DRBD - set backup DUT as primary for core DRBD - deatch cache drive from standby cache instance - activate standby cache instance directly on the cache drive - verify dirty stats did not increase - calculate checksum of CAS exported object - Verify that the two checksums are equal - Power on the main DUT """ with TestRun.step("Make sure DRBD is installed on both nodes"): check_drbd_installed(TestRun.duts) with TestRun.step("Prepare DUTs"): prepare_devices(TestRun.duts) primary_node, secondary_node = TestRun.duts with TestRun.step("Prepare DRBD config files on both DUTs"): cache_drbd_resource, core_drbd_resource = create_drbd_configs(primary_node, secondary_node) for i in TestRun.iteration(range(num_iterations)): with TestRun.step("Prefill primary storage device with zeroes"), TestRun.use_dut( primary_node ): Dd().block_size(Size(1, Unit.MebiByte)).input("/dev/zero").output( f"{primary_node.core_dev.path}" ).oflag("direct").run() with TestRun.step("Start standby cache instance on secondary DUT"), TestRun.use_dut( secondary_node ): secondary_node.cache = casadm.standby_init( cache_dev=secondary_node.cache_dev, cache_line_size=cls, cache_id=cache_id, force=True, ) for dut in TestRun.duts: with TestRun.step(f"Create DRBD instances on {dut.ip}"), TestRun.use_dut(dut): dut.cache_drbd = Drbd(cache_drbd_resource) dut.cache_drbd.create_metadata(force=True) dut.cache_drbd_dev = dut.cache_drbd.up() dut.core_drbd = Drbd(core_drbd_resource) dut.core_drbd.create_metadata(force=True) dut.core_drbd_dev = dut.core_drbd.up() with TestRun.step( f"Set {primary_node.ip} as primary node for both DRBD instances" ), TestRun.use_dut(primary_node): primary_node.cache_drbd.set_primary() primary_node.core_drbd.set_primary() with TestRun.step( f"Start cache on top of cache DRBD device with cacheline size {cls} and {cache_mode} " "cache mode" ), TestRun.use_dut(primary_node): primary_node.cache = casadm.start_cache( primary_node.cache_drbd_dev, force=True, cache_mode=cache_mode, cache_line_size=cls, cache_id=cache_id, ) core = primary_node.cache.add_core(primary_node.core_drbd_dev) with TestRun.step("Set NOP cleaning policy"), TestRun.use_dut(primary_node): primary_node.cache.set_cleaning_policy(CleaningPolicy.nop) with TestRun.step("Disable sequential cutoff"), TestRun.use_dut(primary_node): primary_node.cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) with TestRun.step("Wait for DRBD synchronization"), TestRun.use_dut(primary_node): primary_node.cache_drbd.wait_for_sync() primary_node.core_drbd.wait_for_sync() with TestRun.step( "Fill cache with random 50% read/write mix workload, block size 4K" ), TestRun.use_dut(primary_node): bs = Size(4, Unit.KibiByte) io_size = calc_io_size(cache_size, core_size, cls, bs) if CacheModeTrait.InsertRead not in CacheMode.get_traits(cache_mode): io_size = io_size * 2 fio = ( Fio() .create_command() .direct(True) .read_write(ReadWrite.randrw) .block_size(bs) .size(core_size) .io_size(io_size) .file_name(core.path) .io_depth(64) .rand_seed(TestRun.random_seed) .set_param("allrandrepeat", 1) .set_flags("refill_buffers") ) fio.run() with TestRun.step("Verify cache is > 25% dirty"), TestRun.use_dut(primary_node): dirty_after_initial_io = primary_node.cache.get_statistics( percentage_val=True ).usage_stats.dirty if dirty_after_initial_io < 25: if dirty_after_initial_io == 0.0: TestRun.LOGGER.exception("Expected at least 25% dirty data, got 0") else: TestRun.LOGGER.warning( f"Expected at least 25% dirty data, got {dirty_after_initial_io}" ) with TestRun.step("Switch to WO cache mode without flush"), TestRun.use_dut(primary_node): primary_node.cache.set_cache_mode(CacheMode.WO, flush=False) with TestRun.step("Calculate checksum of CAS exported object"), TestRun.use_dut( primary_node ): checksum1 = TestRun.executor.run(f"md5sum {core.path}").stdout.split()[0] with TestRun.step( f"Switch back to the {cache_mode} cache mode without flush" ), TestRun.use_dut(primary_node): primary_node.cache.set_cache_mode(cache_mode, flush=False) with TestRun.step("Issue cache flush command in background"), TestRun.use_dut(primary_node): TestRun.executor.run_in_background( cli.flush_cache_cmd(str(primary_node.cache.cache_id)) ) with TestRun.step("Wait 2s"): time.sleep(2) with TestRun.step( "Verify cleaner is progressing by inspecting dirty statistics" ), TestRun.use_dut(primary_node): dirty_after_cleaning = primary_node.cache.get_statistics( percentage_val=True ).usage_stats.dirty TestRun.LOGGER.info( f"Dirty stats change: {dirty_after_initial_io}% -> {dirty_after_cleaning}%" ) # make sure there is cleaning progress if dirty_after_cleaning >= dirty_after_initial_io: TestRun.LOGGER.exception("No cleaning progress detected") # make sure there is dirty data left to clean if dirty_after_cleaning < 20: TestRun.LOGGER.exception("Not enough dirty data") with TestRun.step(f"Power off the main DUT"), TestRun.use_dut(primary_node): timed_async_power_cycle() with TestRun.step("Stop cache DRBD on the secondary node"), TestRun.use_dut(secondary_node): secondary_node.cache_drbd.down() with TestRun.step("Set backup DUT as primary for core DRBD"), TestRun.use_dut( secondary_node ): secondary_node.core_drbd.set_primary() with TestRun.step("Deatch cache drive from standby cache instance"), TestRun.use_dut( secondary_node ): secondary_node.cache.standby_detach() with TestRun.step( "Activate standby cache instance directly on the cache drive" ), TestRun.use_dut(secondary_node): secondary_node.cache.standby_activate(secondary_node.cache_dev) with TestRun.step("Verify there is some dirty data after failover"), TestRun.use_dut( secondary_node ): dirty_after_failover = secondary_node.cache.get_statistics( percentage_val=True ).usage_stats.dirty if dirty_after_failover > dirty_after_cleaning: TestRun.LOGGER.exception("Unexpeted increase in dirty cacheline count") elif dirty_after_failover == 0: TestRun.LOGGER.exception( "No dirty data after failover. This might indicate that power cycle took too " "long or cleaning/network is too fast\n" ) else: TestRun.LOGGER.info(f"Dirty cachelines after failover: {dirty_after_failover}") with TestRun.step("Calculate checksum of CAS exported object"), TestRun.use_dut( secondary_node ): checksum2 = TestRun.executor.run(f"md5sum {core.path}").stdout.split()[0] with TestRun.step("Verify that the two checksums are equal"): if checksum1 != checksum2: TestRun.LOGGER.error( f"Checksum mismatch: primary {checksum1} secondary {checksum2}" ) with TestRun.step("Cleanup after iteration"), TestRun.use_dut(secondary_node): secondary_node.cache.stop(no_data_flush=True) Drbd.down_all() with TestRun.step("Wait for the primary DUT to be back online"), TestRun.use_dut( primary_node ): TestRun.executor.wait_for_connection() @pytest.mark.os_dependent @pytest.mark.multidut(2) @pytest.mark.require_disk("cache_dev", DiskTypeSet([DiskType.optane])) @pytest.mark.require_disk("core_dev", DiskTypeSet([DiskType.nand])) @pytest.mark.parametrize("cache_mode", CacheMode.with_any_trait(CacheModeTrait.InsertRead)) @pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB]) @pytest.mark.parametrize("cleaning_policy", CleaningPolicy) @pytest.mark.parametrize("num_iterations", [2]) def test_failover_during_io_with_eviction(cache_mode, cls, cleaning_policy, num_iterations): """ title: Failover sequence with after power failure during I/O with eviction description: Verify proper failover behaviour and data integrity after power failure during I/O handling with eviction pass_criteria: - Failover procedure success - Data integrity is maintained parametrizations: - cache mode: all cache modes that insert reads to trigger eviction during read I/O - cacheline size: 4K, 64K - to test both sector I/O and full-cacheline I/O - cleaning policy: all policies - ALRU configured to trigger immediately steps: - On 2 DUTs (main and backup) prepare cache device of 10GiB size - On 2 DUTs (main and backup) prepare primary storage device of size 15GiB - On main DUT prefill primary storage device with zeroes - Start a standby cache instance on the backup DUT with parametrized cacheline size - Configure DRBD to replicate cache and core storage from main to backup node - On main DUT: - Start WB cache on top of cache DRBD device with parametrized cacheline size - Set cleaning policy to NOP - Set sequential cutoff to never - Wait for DRBD synchronization - Fill cache with random 50% read/write mix workload, block size = parametrized cache line size - Verify cache is > 25% dirty - Verify cache ocuppancy is 100% - Switch to WO cache mode without flush - Calculate checksum of CAS exported object - Switch back to parametrized cache mode without flush - Switch to parametrized cleaning policy and cache mode - Run multi-threaded I/O, 100% random read, block_size range [4K, parametrized cache line size] with 4K increment, different random seed than the previous prefill I/O, entire primary storage LBA address range, runtime 1h - Verify cache miss statistic is being incremented - Verify pass-through I/O statistic is not being incremented - Power off the main DUT - On backup DUT: - stop cache DRBD - set backup DUT as primary for core DRBD - deatch cache drive from standby cache instance - activate standby cache instance directly on the cache drive - calculate checksum of CAS exported object - Verify that the two checksums are equal - Power on the main DUT """ with TestRun.step("Make sure DRBD is installed on both nodes"): check_drbd_installed(TestRun.duts) with TestRun.step("Prepare DUTs"): prepare_devices(TestRun.duts) primary_node, secondary_node = TestRun.duts with TestRun.step("Prepare DRBD config files on both DUTs"): cache_drbd_resource, core_drbd_resource = create_drbd_configs(primary_node, secondary_node) for i in TestRun.iteration(range(num_iterations)): with TestRun.step("Prefill primary storage device with zeroes"), TestRun.use_dut( primary_node ): Dd().block_size(Size(1, Unit.MebiByte)).input("/dev/zero").output( f"{primary_node.core_dev.path}" ).oflag("direct").run() with TestRun.step("Start standby cache instance on secondary DUT"), TestRun.use_dut( secondary_node ): secondary_node.cache = casadm.standby_init( cache_dev=secondary_node.cache_dev, cache_line_size=cls, cache_id=cache_id, force=True, ) for dut in TestRun.duts: with TestRun.step(f"Create DRBD instances on {dut.ip}"), TestRun.use_dut(dut): dut.cache_drbd = Drbd(cache_drbd_resource) dut.cache_drbd.create_metadata(force=True) dut.cache_drbd_dev = dut.cache_drbd.up() dut.core_drbd = Drbd(core_drbd_resource) dut.core_drbd.create_metadata(force=True) dut.core_drbd_dev = dut.core_drbd.up() with TestRun.step( f"Set {primary_node.ip} as primary node for both DRBD instances" ), TestRun.use_dut(primary_node): primary_node.cache_drbd.set_primary() primary_node.core_drbd.set_primary() with TestRun.step( f"Start cache on top of cache DRBD device with cacheline size {cls} and WB cache mode" ), TestRun.use_dut(primary_node): primary_node.cache = casadm.start_cache( primary_node.cache_drbd_dev, force=True, cache_mode=CacheMode.WB, cache_line_size=cls, cache_id=cache_id, ) core = primary_node.cache.add_core(primary_node.core_drbd_dev) with TestRun.step("Set NOP cleaning policy"), TestRun.use_dut(primary_node): primary_node.cache.set_cleaning_policy(CleaningPolicy.nop) with TestRun.step("Disable sequential cutoff"), TestRun.use_dut(primary_node): primary_node.cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) with TestRun.step("Wait for DRBD synchronization"), TestRun.use_dut(primary_node): primary_node.cache_drbd.wait_for_sync() primary_node.core_drbd.wait_for_sync() with TestRun.step( f"Fill cache with random 50% read/write mix workload, block size {int(cls)//1024}KiB" ), TestRun.use_dut(primary_node): bs = Size(int(cls), Unit.Byte) io_size = calc_io_size(cache_size, core_size, cls, bs) fio = ( Fio() .create_command() .direct(True) .read_write(ReadWrite.randrw) .io_depth(64) .block_size(Size(int(cls), Unit.Byte)) .size(core_size) .io_size(io_size) .file_name(core.path) .rand_seed(TestRun.random_seed) .set_param("allrandrepeat", 1) .set_flags("refill_buffers") ) fio.run() with TestRun.step("Verify cache is > 25% dirty"), TestRun.use_dut(primary_node): dirty_after_initial_io = primary_node.cache.get_statistics( percentage_val=True ).usage_stats.dirty if dirty_after_initial_io < 25: TestRun.LOGGER.warning("Expected at least 25% dirty data") with TestRun.step("Verify cache ocuppancy is 100%"), TestRun.use_dut(primary_node): occupancy = primary_node.cache.get_statistics(percentage_val=True).usage_stats.occupancy if occupancy < 99: TestRun.LOGGER.warning("Expeted cache occupancy close to 100%\n") with TestRun.step("Switch to WO cache mode without flush"), TestRun.use_dut(primary_node): primary_node.cache.set_cache_mode(CacheMode.WO, flush=False) with TestRun.step("Calculate checksum of CAS exported object"), TestRun.use_dut( primary_node ): checksum1 = TestRun.executor.run(f"md5sum {core.path}").stdout.split()[0] with TestRun.step(f"Set {cache_mode} cache mode without flush"), TestRun.use_dut( primary_node ): primary_node.cache.set_cache_mode(cache_mode, flush=False) with TestRun.step(f"Switch to {cleaning_policy} cleaning policy"), TestRun.use_dut( primary_node ): primary_node.cache.set_cleaning_policy(cleaning_policy) if cleaning_policy == CleaningPolicy.alru: TestRun.LOGGER.info("Configure ALRU to trigger immediately\n") params = FlushParametersAlru( activity_threshold=Time(milliseconds=0), wake_up_time=Time(seconds=0), staleness_time=Time(seconds=1), ) primary_node.cache.set_params_alru(params) with TestRun.step("Wait 2s for cleaner to kick in"): time.sleep(2) with TestRun.step("Read stats before fio"), TestRun.use_dut(primary_node): stats_before = primary_node.cache.get_statistics() with TestRun.step("Run multi-threaded fio"), TestRun.use_dut(primary_node): start_size = Size(4, Unit.KibiByte).get_value() stop_size = int(cls) fio = ( Fio() .create_command() .direct(True) .read_write(ReadWrite.randread) .blocksize_range([(start_size, stop_size)]) .file_name(core.path) .rand_seed(TestRun.random_seed + 1) .num_jobs(16) .size(core_size) .time_based(True) .run_time(timedelta(minutes=60)) .set_param("allrandrepeat", 1) .set_flags("refill_buffers") ) fio.run_in_background() with TestRun.step("Wait 2s for I/O to take effect"): time.sleep(2) with TestRun.step("Verify cache miss statistic is being incremented"), TestRun.use_dut( primary_node ): stats_after = primary_node.cache.get_statistics() read_misses_before = ( stats_before.request_stats.read.full_misses + stats_before.request_stats.read.part_misses ) read_misses_after = ( stats_after.request_stats.read.full_misses + stats_after.request_stats.read.part_misses ) TestRun.LOGGER.info(f"Read miss change: {read_misses_before} -> {read_misses_after}") if read_misses_after <= read_misses_before: TestRun.LOGGER.exception(f"Expected read misses increase was not registered") with TestRun.step( "Verify pass-through I/O statistic is not being incremented" ), TestRun.use_dut(primary_node): pt_reads_before = stats_before.request_stats.pass_through_reads pt_reads_after = stats_after.request_stats.pass_through_reads TestRun.LOGGER.info(f"PT reads requests change: {pt_reads_before} -> {pt_reads_after}") if pt_reads_before != pt_reads_after: TestRun.LOGGER.exception(f"Unexpected increase in PT statistics") with TestRun.step(f"Power off the main DUT"), TestRun.use_dut(primary_node): timed_async_power_cycle() with TestRun.step("Stop cache DRBD on the secondary node"), TestRun.use_dut(secondary_node): secondary_node.cache_drbd.down() with TestRun.step("Set backup DUT as primary for core DRBD"), TestRun.use_dut( secondary_node ): secondary_node.core_drbd.set_primary() with TestRun.step("Deatch cache drive from standby cache instance"), TestRun.use_dut( secondary_node ): secondary_node.cache.standby_detach() with TestRun.step( "Activate standby cache instance directly on the cache drive" ), TestRun.use_dut(secondary_node): secondary_node.cache.standby_activate(secondary_node.cache_dev) with TestRun.step("Calculate checksum of CAS exported object"), TestRun.use_dut( secondary_node ): checksum2 = TestRun.executor.run(f"md5sum {core.path}").stdout.split()[0] with TestRun.step("Verify that the two checksums are equal"): if checksum1 != checksum2: TestRun.LOGGER.error( f"Checksum mismatch: primary {checksum1} secondary {checksum2}" ) with TestRun.step("Cleanup after iteration"), TestRun.use_dut(secondary_node): secondary_node.cache.stop(no_data_flush=True) Drbd.down_all() with TestRun.step("Wait for the primary DUT to be back online"), TestRun.use_dut( primary_node ): TestRun.executor.wait_for_connection() @pytest.mark.os_dependent @pytest.mark.multidut(2) @pytest.mark.require_disk("cache_dev", DiskTypeSet([DiskType.optane])) @pytest.mark.require_disk("core_dev", DiskTypeSet([DiskType.nand])) @pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB]) @pytest.mark.parametrize("cleaning_policy", [c for c in CleaningPolicy if c != CleaningPolicy.alru]) @pytest.mark.parametrize("num_iterations", [1]) def test_failover_io_long(cls, cleaning_policy, num_iterations): """ title: Failover WB I/O long Description: 4h I/O with data verification in failover setup pass_criteria: - Data integrity is maintained - Failover procedure success parametrizations: - cacheline size: 4K, 64K - to test both sector I/O and full-cacheline I/O - cleaning policy: all except ALRU, as it doesn't do any cleaning in runtime steps: - On 2 DUTs (main and backup) prepare cache device of 10GiB size - On 2 DUTs (main and backup) prepare primary storage device of size 15GiB - Start a standby cache instance on the backup DUT with parametrized cacheline size - Configure DRBD to replicate cache and core storage from main to backup node - On main DUT: - Start WB cache on top of cache DRBD device with parametrized cacheline size - Set the parametrized cleaning policy - Set sequential cutoff to never - Create XFS file system on CAS exported object - Mount file system - Preallocate fio file in PT cache mode - Wait for DRBD synchronization - Run 4h FIO with data verification: random R/W, 16 jobs, filesystem, entire primary storage LBA address range, --bssplit=4k/10:8k/25:16k/25:32k/20:64k/10:128k/5:256k/5 - Verify no data errors - Switch to WO cache mode without flush - Calculate checksum of fio test file(s) - Switch back to WB cache mode without flush - Flush page cache - Power off the main DUT - On backup DUT: - stop cache DRBD - set backup DUT as primary for core DRBD - deatch cache drive from standby cache instance - activate standby cache instance directly on the cache drive - mount file system located on CAS exported object - Calculate checksum of fio test file(s) - Verify checksums from the previous steps are equal - Power on the main DUT """ with TestRun.step("Make sure DRBD is installed on both nodes"): check_drbd_installed(TestRun.duts) with TestRun.step("Prepare DUTs"): prepare_devices(TestRun.duts) primary_node, secondary_node = TestRun.duts with TestRun.step(f"Create mount point"): mountpoint = "/tmp/standby_io_test_mount_point" for dut in TestRun.duts: with TestRun.use_dut(secondary_node): TestRun.executor.run(f"rm -rf {mountpoint}") create_directory(path=mountpoint) with TestRun.step("Prepare DRBD config files on both DUTs"): cache_drbd_resource, core_drbd_resource = create_drbd_configs(primary_node, secondary_node) for i in TestRun.iteration(range(num_iterations)): with TestRun.step("Prefill primary storage device with zeroes"), TestRun.use_dut( primary_node ): Dd().block_size(Size(1, Unit.MebiByte)).input("/dev/zero").output( f"{primary_node.core_dev.path}" ).oflag("direct").run() with TestRun.step("Start standby cache instance on secondary DUT"), TestRun.use_dut( secondary_node ): secondary_node.cache = casadm.standby_init( cache_dev=secondary_node.cache_dev, cache_line_size=cls, cache_id=cache_id, force=True, ) for dut in TestRun.duts: with TestRun.step(f"Create DRBD instances on {dut.ip}"), TestRun.use_dut(dut): dut.cache_drbd = Drbd(cache_drbd_resource) dut.cache_drbd.create_metadata(force=True) dut.cache_drbd_dev = dut.cache_drbd.up() dut.core_drbd = Drbd(core_drbd_resource) dut.core_drbd.create_metadata(force=True) dut.core_drbd_dev = dut.core_drbd.up() with TestRun.step( f"Set {primary_node.ip} as primary node for both DRBD instances" ), TestRun.use_dut(primary_node): primary_node.cache_drbd.set_primary() primary_node.core_drbd.set_primary() with TestRun.step( f"Start cache on top of cache DRBD device with cacheline size {cls} and WB cache mode" ), TestRun.use_dut(primary_node): primary_node.cache = casadm.start_cache( primary_node.cache_drbd_dev, force=True, cache_mode=CacheMode.WB, cache_line_size=cls, cache_id=cache_id, ) core = primary_node.cache.add_core(primary_node.core_drbd_dev) with TestRun.step(f"Set {cleaning_policy} cleaning policy"), TestRun.use_dut(primary_node): primary_node.cache.set_cleaning_policy(cleaning_policy) with TestRun.step("Disable sequential cutoff"), TestRun.use_dut(primary_node): primary_node.cache.set_seq_cutoff_policy(SeqCutOffPolicy.never) with TestRun.step("Create XFS file system on CAS exported object"), TestRun.use_dut( primary_node ): core.create_filesystem(Filesystem.xfs) with TestRun.step(f"Mount file system"), TestRun.use_dut(primary_node): core.mount(mountpoint) with TestRun.step("Prepare fio command"), TestRun.use_dut(primary_node): file_path = mountpoint + os.path.sep + "fio_file" fio = ( Fio() .create_command() .direct(True) .read_write(ReadWrite.randrw) .bs_split("4k/10:8k/25:16k/25:32k/20:64k/10:128k/5:256k/5") .file_name(file_path) .rand_seed(TestRun.random_seed) .num_jobs(16) .size(core_size * 0.9) # leave some room for FS metadata .io_size(Size(0, Unit.Byte)) .do_verify(True) .set_param("allrandrepeat", 1) .set_flags("refill_buffers") ) with TestRun.step("Preallocate fio file in pass-through"), TestRun.use_dut(primary_node): primary_node.cache.set_cache_mode(CacheMode.PT, flush=False) # 0 bytes of actual I/O, *not* time based - will just allocate the file fio.time_based(False).run(timedelta(hours=1)) primary_node.cache.set_cache_mode(CacheMode.WB, flush=False) with TestRun.step("Wait for DRBD synchronization"), TestRun.use_dut(primary_node): primary_node.cache_drbd.wait_for_sync() primary_node.core_drbd.wait_for_sync() with TestRun.step( "Run 4h FIO with data verification: random R/W, 16 jobs, filesystem, " "entire primary storage LBA address range, block size split " "4k/10:8k/25:16k/25:32k/20:64k/10:128k/5:256k/5" ), TestRun.use_dut(primary_node): fio.time_based(True).run_time(timedelta(hours=4)).run() with TestRun.step("Switch to WO cache mode without flush"), TestRun.use_dut(primary_node): primary_node.cache.set_cache_mode(CacheMode.WO, flush=False) with TestRun.step("Calculate checksum of fio test file(s)"), TestRun.use_dut(primary_node): checksum1 = File(file_path).md5sum() with TestRun.step(f"Switch back to the WB cache mode without flush"), TestRun.use_dut( primary_node ): primary_node.cache.set_cache_mode(CacheMode.WB, flush=False) with TestRun.step(f"Power off the main DUT"), TestRun.use_dut(primary_node): power_control = TestRun.plugin_manager.get_plugin("power_control") power_control.power_cycle(wait_for_connection=False) with TestRun.step("Stop cache DRBD on the secondary node"), TestRun.use_dut(secondary_node): secondary_node.cache_drbd.down() with TestRun.step("Set backup DUT as primary for core DRBD"), TestRun.use_dut( secondary_node ): secondary_node.core_drbd.set_primary() with TestRun.step("Deatch cache drive from standby cache instance"), TestRun.use_dut( secondary_node ): secondary_node.cache.standby_detach() with TestRun.step( "Activate standby cache instance directly on the cache drive" ), TestRun.use_dut(secondary_node): secondary_node.cache.standby_activate(secondary_node.cache_dev) with TestRun.step(f"Mount file system"), TestRun.use_dut(secondary_node): core.mount(mountpoint) with TestRun.step("Calculate checksum of CAS exported object"), TestRun.use_dut( secondary_node ): checksum2 = File(file_path).md5sum() with TestRun.step("Verify that the two checksums are equal"): if checksum1 != checksum2: TestRun.LOGGER.error( f"Checksum mismatch: primary {checksum1} secondary {checksum2}" ) with TestRun.step("Cleanup after iteration"), TestRun.use_dut(secondary_node): core.unmount() secondary_node.cache.stop(no_data_flush=True) Drbd.down_all() with TestRun.step("Wait for the primary DUT to be back online"), TestRun.use_dut( primary_node ): TestRun.executor.wait_for_connection() def check_drbd_installed(duts): for dut in duts: with TestRun.use_dut(dut): if not Drbd.is_installed(): TestRun.fail(f"DRBD is not installed on DUT {dut.ip}") def prepare_devices(duts): for dut in duts: with TestRun.use_dut(dut): TestRun.dut.hostname = TestRun.executor.run_expect_success("uname -n").stdout TestRun.disks["cache_dev"].create_partitions([cache_size] + [metadata_size] * 2) dut.cache_dev = TestRun.disks["cache_dev"].partitions[0] dut.cache_md_dev = TestRun.disks["cache_dev"].partitions[1] dut.core_md_dev = TestRun.disks["cache_dev"].partitions[2] TestRun.disks["core_dev"].create_partitions([core_size]) dut.core_dev = TestRun.disks["core_dev"].partitions[0] def create_drbd_configs(primary, secondary): cache_drbd_nodes = [ Node( primary.hostname, primary.cache_dev.path, primary.cache_md_dev.path, primary.ip, "7790" ), Node( secondary.hostname, cache_exp_obj_path, secondary.cache_md_dev.path, secondary.ip, "7790", ), ] core_drbd_nodes = [ Node(dut.hostname, dut.core_dev.path, dut.core_md_dev.path, dut.ip, "7791") for dut in [primary, secondary] ] cache_drbd_resource = Resource(name="caches", device="/dev/drbd0", nodes=cache_drbd_nodes) core_drbd_resource = Resource(name="cores", device="/dev/drbd100", nodes=core_drbd_nodes) for dut in [primary, secondary]: with TestRun.use_dut(dut): cache_drbd_resource.save() core_drbd_resource.save() return cache_drbd_resource, core_drbd_resource