failover advanced test scenarios
Signed-off-by: Adam Rutkowski <adam.j.rutkowski@intel.com>
This commit is contained in:
parent
e82b07746e
commit
a8be9d4bf3
215
test/functional/tests/failover_standby/test_failover_advanced.py
Normal file
215
test/functional/tests/failover_standby/test_failover_advanced.py
Normal file
@ -0,0 +1,215 @@
|
||||
#
|
||||
# Copyright(c) 2022 Intel Corporation
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
#
|
||||
|
||||
import pytest
|
||||
|
||||
from api.cas.cache_config import CacheMode, CleaningPolicy, CacheModeTrait, CacheLineSize
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="not implemented")
|
||||
@pytest.mark.multidut(2)
|
||||
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
|
||||
@pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB])
|
||||
@pytest.mark.parametrize("cleaning_policy", CleaningPolicy)
|
||||
def test_failover_during_background_cleaning(pyocf_ctx, cache_mode, cls, cleaning_policy):
|
||||
"""
|
||||
title: Failover sequence with background cleaning:
|
||||
description:
|
||||
Verify proper failover behaviour and data integrity after power failure during background
|
||||
cleaning running.
|
||||
pass_criteria:
|
||||
- Failover procedure success
|
||||
- Data integrity is maintained
|
||||
parametrizations:
|
||||
- cache mode: all cache modes with lazy writes - to make sure dirty data is produced so that
|
||||
metadata synchronization between hosts occurs
|
||||
- cacheline size: 4K, 64K - to test both sector I/O and full-cacheline I/O
|
||||
- cleaning policy - as different policies have separate metadata handling implementation
|
||||
steps:
|
||||
- On 2 DUTs (main and backup) prepare RAID1 cache devices of 1GiB size, comprising of 2
|
||||
Optane drives each.
|
||||
- On 2 DUTs (main and backup) prepare primary storage device of size 1.5GiB
|
||||
- On main DUT prefill primary storage device with random data
|
||||
- Start a standby cache instance on the backup DUT with parametrized cacheline size
|
||||
- Configure DRBD to replicate cache and core storage from main to backup node
|
||||
- On main DUT:
|
||||
- Start cache on top of cache DRBD device with parametrized cacheline size and cache mode
|
||||
- Set cleaning policy to NOP
|
||||
- Wait for DRBD synchronization
|
||||
- Fill cache with random 50% read/write mix workload, block size 4K
|
||||
- Verify cache is > 25% dirty
|
||||
- Switch to WO cache mode without flush
|
||||
- Calculate checksum of CAS exported object
|
||||
- Switch back to the parametrized cache mode without flush
|
||||
- Switch to parametrized cleaning policy
|
||||
- Wait for the background cleaner to start working (no wait for ACP, according to
|
||||
policy parameters for ALRU)
|
||||
- Verify cleaner is progressing by inspecting dirty statistics
|
||||
- Power off the main DUT
|
||||
- On backup DUT:
|
||||
- stop cache DRBD
|
||||
- set backup DUT as primary for core DRBD
|
||||
- deatch cache drive from standby cache instance
|
||||
- activate standby cache instance directly on the cache RAID drive
|
||||
- calculate checksum of CAS exported object
|
||||
- Verify that the two checksums are equal
|
||||
- Power on the main DUT
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="not implemented")
|
||||
@pytest.mark.multidut(2)
|
||||
@pytest.mark.parametrize("cache_mode", CacheMode.with_traits(CacheModeTrait.LazyWrites))
|
||||
@pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB])
|
||||
def test_failover_during_dirty_flush(pyocf_ctx, cache_mode, cls):
|
||||
"""
|
||||
title: Failover sequence with after power failure during dirty data flush
|
||||
description:
|
||||
Verify proper failover behaviour and data integrity after power failure during
|
||||
user-issued cleaning
|
||||
pass_criteria:
|
||||
- Failover procedure success
|
||||
- Data integrity is maintained
|
||||
parametrizations:
|
||||
- cache mode: all cache modes with lazy writes - to make sure dirty data is produced so that
|
||||
metadata synchronization between hosts occurs
|
||||
- cacheline size: 4K, 64K - to test both sector I/O and full-cacheline I/O
|
||||
steps:
|
||||
- On 2 DUTs (main and backup) prepare RAID1 cache devices of 1GiB size, comprising of 2
|
||||
Optane drives each.
|
||||
- On 2 DUTs (main and backup) prepare primary storage device of size 1.5GiB
|
||||
- On main DUT prefill primary storage device with random data
|
||||
- Start a standby cache instance on the backup DUT with parametrized cacheline size
|
||||
- Configure DRBD to replicate cache and core storage from main to backup node
|
||||
- On main DUT:
|
||||
- Start cache on top of cache DRBD device with parametrized cacheline size and cache mode
|
||||
- Wait for DRBD synchronization
|
||||
- Set cleaning policy to NOP
|
||||
- Fill cache with random 50% read/write mix workload, block size 4K
|
||||
- Verify cache is > 25% dirty
|
||||
- Switch to WO cache mode without flush
|
||||
- Calculate checksum of CAS exported object
|
||||
- Switch back to the parametrized cache mode without flush
|
||||
- Issue cache flush command
|
||||
- Verify flush is progressing by inspecting dirty statistics
|
||||
- Power off the main DUT
|
||||
- On backup DUT:
|
||||
- stop cache DRBD
|
||||
- set backup DUT as primary for core DRBD
|
||||
- deatch cache drive from standby cache instance
|
||||
- activate standby cache instance directly on the cache RAID drive
|
||||
- calculate checksum of CAS exported object
|
||||
- Verify that the two checksums are equal
|
||||
- Power on the main DUT
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="not implemented")
|
||||
@pytest.mark.multidut(2)
|
||||
@pytest.mark.parametrize(
|
||||
"cache_mode", [m for m in CacheMode if m != CacheMode.WO and m != CacheMode.PT]
|
||||
)
|
||||
@pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB])
|
||||
@pytest.mark.parametrize("cleaning_policy", [c for c in CleaningPolicy if c != CleaningPolicy.alru])
|
||||
def test_failover_during_io_with_eviction(pyocf_ctx, cache_mode, cls, cleaning_policy):
|
||||
"""
|
||||
title: Failover sequence with after power failure during I/O with eviction
|
||||
description:
|
||||
Verify proper failover behaviour and data integrity after power failure during
|
||||
I/O handling with eviction
|
||||
pass_criteria:
|
||||
- Failover procedure success
|
||||
- Data integrity is maintained
|
||||
parametrizations:
|
||||
- cache mode: all cache modes except WO and PT - to trigger eviction via
|
||||
reads
|
||||
- cacheline size: 4K, 64K - to test both sector I/O and full-cacheline I/O
|
||||
- cleaning policy: all except ALRU, as it doesn't do any cleaning in runtime
|
||||
steps:
|
||||
- On 2 DUTs (main and backup) prepare RAID1 cache devices of 1GiB size, comprising of 2
|
||||
Optane drives each.
|
||||
- On 2 DUTs (main and backup) prepare primary storage device of size 1.5GiB
|
||||
- On main DUT prefill primary storage device with random data
|
||||
- Start a standby cache instance on the backup DUT with parametrized cacheline size
|
||||
- Configure DRBD to replicate cache and core storage from main to backup node
|
||||
- On main DUT:
|
||||
- Start WB cache on top of cache DRBD device with parametrized cacheline size
|
||||
- Set cleaning policy to NOP
|
||||
- Wait for DRBD synchronization
|
||||
- Fill cache with random 50% read/write mix workload, block size = parametrized cache
|
||||
line size
|
||||
- Verify cache is > 25% dirty
|
||||
- Verify cache ocuppancy is 100%
|
||||
- Switch to WO cache mode without flush
|
||||
- Calculate checksum of CAS exported object
|
||||
- Switch back to parametrized cache mode without flush
|
||||
- Switch to parametrized cleaning policy and cache mode
|
||||
- Run multi-threaded I/O, 100% random read, block_size range [4K, parametrized cache line
|
||||
size] with 4K increment, different random seed than the previous prefill I/O, entire
|
||||
primary storage LBA address range, runtime 1h
|
||||
- Verify cache miss statistic is being incremented
|
||||
- Verify pass-through I/O statistic is not being incremented
|
||||
- Power off the main DUT
|
||||
- On backup DUT:
|
||||
- stop cache DRBD
|
||||
- set backup DUT as primary for core DRBD
|
||||
- deatch cache drive from standby cache instance
|
||||
- activate standby cache instance directly on the cache RAID drive
|
||||
- calculate checksum of CAS exported object
|
||||
- Verify that the two checksums are equal
|
||||
- Power on the main DUT
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="not implemented")
|
||||
@pytest.mark.multidut(2)
|
||||
@pytest.mark.parametrize("cls", [CacheLineSize.LINE_4KiB, CacheLineSize.LINE_64KiB])
|
||||
@pytest.mark.parametrize("cleaning_policy", [c for c in CleaningPolicy if c != CleaningPolicy.alru])
|
||||
def test_failover_io_long(pyocf_ctx, cls, cleaning_policy):
|
||||
"""
|
||||
title:
|
||||
Failover WB I/O long
|
||||
Description:
|
||||
4h I/O with data verification in failover setup
|
||||
pass_criteria:
|
||||
- Data integrity is maintained
|
||||
- Failover procedure success
|
||||
parametrizations:
|
||||
- cacheline size: 4K, 64K - to test both sector I/O and full-cacheline I/O
|
||||
- cleaning policy: all except ALRU, as it doesn't do any cleaning in runtime
|
||||
steps:
|
||||
- On 2 DUTs (main and backup) prepare RAID1 cache devices of 1GiB size, comprising of 2
|
||||
Optane drives each.
|
||||
- On 2 DUTs (main and backup) prepare primary storage device of size 1.5GiB
|
||||
- Start a standby cache instance on the backup DUT with parametrized cacheline size
|
||||
- Configure DRBD to replicate cache and core storage from main to backup node
|
||||
- On main DUT:
|
||||
- Start WB cache on top of cache DRBD device with parametrized cacheline size
|
||||
- Set the parametrized cleaning policy
|
||||
- Create XFS file system on CAS exported object
|
||||
- Wait for DRBD synchronization
|
||||
- Mount file system
|
||||
- Run 4h FIO with data verification: random R/W, 16 jobs, filesystem, entire primary
|
||||
storage LBA address range, --bssplit=4k/10:8k/25:16k/25:32k/20:64k/10:128k/5:256k/5
|
||||
- Verify no data errors
|
||||
- Switch to WO cache mode without flush
|
||||
- Calculate checksum of fio test file(s)
|
||||
- Switch back to WB cache mode without flush
|
||||
- Flush page cache
|
||||
- Power off the main DUT
|
||||
- On backup DUT:
|
||||
- stop cache DRBD
|
||||
- set backup DUT as primary for core DRBD
|
||||
- deatch cache drive from standby cache instance
|
||||
- activate standby cache instance directly on the cache RAID drive
|
||||
- mount file system located on CAS exported object
|
||||
- Calculate checksum of fio test file(s)
|
||||
- Verify checksums from the previous steps are equal
|
||||
- Power on the main DUT
|
||||
"""
|
||||
pass
|
Loading…
Reference in New Issue
Block a user