Tests for errors on core device

Signed-off-by: Jan Musial <jan.musial@intel.com>
This commit is contained in:
Jan Musial 2019-12-17 11:01:21 +01:00
parent 79e54b81a6
commit 21f18b70db

View File

@ -0,0 +1,149 @@
#
# Copyright(c) 2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
import pytest
from test_tools.fio.fio import Fio
from test_tools.fio.fio_param import ReadWrite, IoEngine, ErrorFilter, VerifyMethod
from test_tools.device_mapper import ErrorDevice, DmTable
from core.test_run import TestRun
from api.cas import casadm
from api.cas.cache_config import (
CacheMode,
CacheLineSize,
SeqCutOffPolicy,
CleaningPolicy,
)
from storage_devices.disk import DiskTypeSet, DiskType, DiskTypeLowerThan
from test_utils.os_utils import Udev
from test_utils.size import Size, Unit
@pytest.mark.parametrize("cache_line_size", CacheLineSize)
@pytest.mark.parametrize("cache_mode", CacheMode)
@pytest.mark.parametrize("io_dir", [ReadWrite.randread, ReadWrite.randwrite])
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
def test_core_device_error(io_dir, cache_mode, cache_line_size):
"""
title: Check if CAS behaves correctly when encountering errors on core device
description: |
Perform I/O on two exported objects created using error and non-error device.
Validate CAS that stats counting is consistent with OS reporting.
Also, check if normal I/O is uninterrupted and no DC occurs on any of the
core devices.
pass_criteria:
- I/O error count in FIO and in cache statistics match
- Positively passed fio verify on both core devices
"""
with TestRun.step("Prepare error device and setup cache and cores"):
cache, error_core, good_core = prepare_configuration(cache_mode, cache_line_size)
good_core_fio = (
Fio()
.create_command()
.io_engine(IoEngine.libaio)
.size(good_core.size)
.block_size(cache_line_size)
.target(good_core)
.read_write(ReadWrite.randrw)
.verify_pattern()
.verify(VerifyMethod.pattern)
.direct()
)
error_core_fio = (
Fio()
.create_command()
.io_engine(IoEngine.libaio)
.size(error_core.size)
.block_size(cache_line_size)
.target(error_core)
.read_write(io_dir)
.continue_on_error(ErrorFilter.io)
.direct()
)
if io_dir == ReadWrite.randwrite:
error_core_fio.verify_pattern().verify(VerifyMethod.pattern)
with TestRun.step("Run fio on core without errors in background"):
fio_pid = good_core_fio.run_in_background()
with TestRun.step("Run fio on error core and check if IO errors are present"):
fio_errors = error_core_fio.run()[0].total_errors()
if fio_errors == 0:
TestRun.fail("No I/O ended with error!")
with TestRun.step("Check error statistics on error core"):
stats = cache.get_statistics()
core_errors_in_cache = stats.error_stats.core.total
if fio_errors != core_errors_in_cache:
TestRun.fail(
f"Core errors in cache stats({core_errors_in_cache}) should be equal to number of"
" fio errors ({fio_errors})"
)
with TestRun.step("Wait for fio on good core"):
TestRun.executor.wait_cmd_finish(fio_pid)
with TestRun.step("Check error statistics on good core"):
stats = good_core.get_statistics()
if stats.error_stats.core.total != 0:
TestRun.fail(
f"No errors should be reported for good core. "
"Actual result: {stats.error_stats.total}"
)
with TestRun.step("Stop the cache"):
cache.stop()
with TestRun.step("Verify error core device contents (if writes)"):
if io_dir == ReadWrite.randwrite:
error_core_fio.target(error_core.core_device).verify_only().run()
with TestRun.step("Verify good core device contents"):
good_core_fio.target(good_core.core_device).verify_only().run()
def prepare_configuration(cache_mode, cache_line_size):
cache_device = TestRun.disks["cache"]
core_device = TestRun.disks["core"]
cache_device.create_partitions([Size(70, Unit.MebiByte)])
core_device.create_partitions(
[Size(70, Unit.MebiByte), Size(70, Unit.MebiByte)]
)
core1 = core_device.partitions[0]
core2 = core_device.partitions[1]
error_device = ErrorDevice(
"error",
core1,
DmTable.uniform_error_table(
start_lba=0,
stop_lba=int(core1.size.get_value(Unit.Blocks512)),
num_error_zones=100,
error_zone_size=Size(5, Unit.Blocks512),
).fill_gaps(core1),
)
cache = casadm.start_cache(
cache_device.partitions[0],
cache_mode=cache_mode,
cache_line_size=cache_line_size,
force=True,
)
cache.set_seq_cutoff_policy(SeqCutOffPolicy.never)
cache.set_cleaning_policy(CleaningPolicy.nop)
Udev.disable()
error_core = cache.add_core(core_dev=error_device)
good_core = cache.add_core(core_dev=core2)
return cache, error_core, good_core