failover tests

Signed-off-by: Adam Rutkowski <adam.j.rutkowski@intel.com>
This commit is contained in:
Adam Rutkowski 2021-10-21 10:51:00 +02:00
parent fcfbd860c4
commit 2721378942
2 changed files with 1207 additions and 177 deletions

View File

@ -0,0 +1,591 @@
#
# Copyright(c) 2022-2022 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
#
import pytest
import copy
from ctypes import c_int
from pyocf.types.cache import (
Cache,
CacheMode,
MetadataLayout,
CleaningPolicy,
)
from pyocf.types.core import Core
from pyocf.types.data import Data
from pyocf.types.io import Io, IoDir
from pyocf.types.volume import RamVolume, Volume
from pyocf.types.volume_cache import CacheVolume
from pyocf.types.volume_core import CoreVolume
from pyocf.types.volume_replicated import ReplicatedVolume
from pyocf.types.shared import (
OcfError,
OcfErrorCode,
OcfCompletion,
CacheLines,
CacheLineSize,
SeqCutOffPolicy,
)
from pyocf.utils import Size
from pyocf.rio import Rio, ReadWrite
def test_standby_stop_closes_volume(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_attach(vol, force=False)
cache.stop()
assert not vol.opened
def test_standby_stop_detached(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_attach(vol, force=False)
cache.standby_detach()
assert not vol.opened
cache.stop()
# verify that force flag is required to attach a standby instance
# on a volume where standby instance had previously been running
def test_standby_attach_force_after_standby(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_attach(vol, force=False)
cache.standby_detach()
cache.stop()
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
with pytest.raises(OcfError) as ex:
cache.standby_attach(vol, force=False)
assert ex.value.error_code == OcfErrorCode.OCF_ERR_METADATA_FOUND
cache.standby_attach(vol, force=True)
def test_standby_attach_force_after_active(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.attach_device(vol)
cache.stop()
assert not vol.opened
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
with pytest.raises(OcfError) as ex:
cache.standby_attach(vol, force=False)
assert ex.value.error_code == OcfErrorCode.OCF_ERR_METADATA_FOUND
cache.standby_attach(vol, force=True)
# standby load from standby cache instance after clean shutdown
def test_standby_load_after_standby_clean_shutdown(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_attach(vol, force=False)
cache.stop()
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
vol.reset_stats()
cache.standby_load(vol, perform_test=False)
assert vol.get_stats()[IoDir.WRITE] == 0
cache.stop()
# standby load from active cache instance after clean shutdown
def test_standby_load_after_active_clean_shutdown(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.attach_device(vol, force=False)
cache.stop()
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
vol.reset_stats()
cache.standby_load(vol, perform_test=False)
assert vol.get_stats()[IoDir.WRITE] == 0
# standby load from active cache instance after clean shutdown
def test_standby_load_after_active_dirty_shutdown(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.attach_device(vol, force=False)
vol.offline()
with pytest.raises(OcfError) as ex:
cache.stop()
assert ex.value.error_code == OcfErrorCode.OCF_ERR_WRITE_CACHE
vol.online()
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
vol.reset_stats()
cache.standby_load(vol, perform_test=False)
assert vol.get_stats()[IoDir.WRITE] == 0
cache.stop()
def test_standby_load_after_standby_dirty_shutdown(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_attach(vol, force=False)
vol.offline()
cache.stop()
vol.online()
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
vol.reset_stats()
cache.standby_load(vol, perform_test=False)
assert vol.get_stats()[IoDir.WRITE] == 0
cache.stop()
def test_standby_load_after_standby_dirty_shutdown_with_vol_test(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_attach(vol, force=False)
vol.offline()
cache.stop()
vol.online()
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_load(vol)
cache.stop()
def test_standby_activate_core_size_mismatch_after_active(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol = RamVolume(Size.from_MiB(150))
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.attach_device(vol, force=False)
# prepare and stop cache instance with standard size core volume
core_vol_uuid = str(id(cache))
core_vol_size_initial = Size.from_MiB(150)
core_vol = RamVolume(core_vol_size_initial, uuid=core_vol_uuid)
core = Core(core_vol)
cache.add_core(core)
cache.stop()
cache = None
# resize core volume
# TODO: how to avoid manually removing vol<->uuid mapping?
del Volume._uuid_[core_vol.uuid]
core_vol = None
core_vol = RamVolume(2 * core_vol_size_initial, uuid=core_vol_uuid)
# standby load on the volume
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_load(vol)
cache.standby_detach()
# first attempt to activate with size mismatch
with pytest.raises(OcfError) as ex:
cache.standby_activate(vol)
assert ex.value.error_code == OcfErrorCode.OCF_ERR_CORE_SIZE_MISMATCH
# second attempt to activate with size mismatch
with pytest.raises(OcfError) as ex:
cache.standby_activate(vol)
assert ex.value.error_code == OcfErrorCode.OCF_ERR_CORE_SIZE_MISMATCH
del Volume._uuid_[core_vol.uuid]
core_vol = RamVolume(core_vol_size_initial, uuid=core_vol_uuid)
# attempt to activate with fixed sizE
cache.standby_activate(vol)
cache.stop()
def test_standby_activate_core_size_mismatch(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
vol1 = RamVolume(Size.from_MiB(150), uuid="cv1")
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.attach_device(vol1, force=False)
core_vol_uuid = str(id(cache))
core_vol_size_initial = Size.from_MiB(150)
core_vol = RamVolume(core_vol_size_initial, uuid=core_vol_uuid)
core2_vol = RamVolume(core_vol_size_initial)
core = Core(core_vol)
core2 = Core(core2_vol, name="core2")
cache.add_core(core)
cache.add_core(core2)
data = vol1.get_bytes()
cache.stop()
vol1 = None
del Volume._uuid_[core_vol.uuid]
core_vol = None
vol2 = RamVolume(Size.from_MiB(150), uuid="cv2")
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_attach(vol2)
cache_vol = CacheVolume(cache, open=True)
write_vol(cache_vol, cache.get_default_queue(), data)
core_vol = RamVolume(2 * core_vol_size_initial, uuid=core_vol_uuid)
cache.standby_detach()
# first attempt to activate with size mismatch
with pytest.raises(OcfError) as ex:
cache.standby_activate(vol2)
assert ex.value.error_code == OcfErrorCode.OCF_ERR_CORE_SIZE_MISMATCH
# second attempt to activate with size mismatch
with pytest.raises(OcfError) as ex:
cache.standby_activate(vol2)
assert ex.value.error_code == OcfErrorCode.OCF_ERR_CORE_SIZE_MISMATCH
del Volume._uuid_[core_vol.uuid]
core_vol = None
core_vol = RamVolume(core_vol_size_initial, uuid=core_vol_uuid)
# attempt to activate with fixed sizE
cache.standby_activate(vol2)
cache.stop()
def test_failover_passive_first(pyocf_2_ctx):
ctx1 = pyocf_2_ctx[0]
ctx2 = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
prim_cache_backend_vol = RamVolume(Size.from_MiB(150))
core_backend_vol = RamVolume(Size.from_MiB(1))
sec_cache_backend_vol = RamVolume(Size.from_MiB(150))
# passive cache with directly on ram disk
cache2 = Cache(owner=ctx2, cache_mode=mode, cache_line_size=cls)
cache2.start_cache()
cache2.standby_attach(sec_cache_backend_vol)
# volume replicating cache1 ramdisk writes to cache2 cache exported object
cache2_exp_obj_vol = CacheVolume(cache2, open=True)
cache1_cache_vol = ReplicatedVolume(prim_cache_backend_vol, cache2_exp_obj_vol)
# active cache
cache1 = Cache.start_on_device(
cache1_cache_vol, ctx1, cache_mode=mode, cache_line_size=cls
)
core = Core(core_backend_vol)
cache1.add_core(core)
core_vol = CoreVolume(core, open=True)
queue = cache1.get_default_queue()
# some I/O
r = (
Rio()
.target(core_vol)
.njobs(1)
.readwrite(ReadWrite.WRITE)
.size(Size.from_MiB(1))
.qd(1)
.run([queue])
)
# capture checksum before simulated active host failure
md5 = core_vol.md5()
# offline primary cache volume and stop primary cache to simulate active host
# failure
cache1_cache_vol.offline()
with pytest.raises(OcfError) as ex:
cache1.stop()
assert ex.value.error_code == OcfErrorCode.OCF_ERR_WRITE_CACHE
# failover
cache2.standby_detach()
cache2.standby_activate(sec_cache_backend_vol, open_cores=False)
# add core explicitly with "try_add" to workaround pyocf limitations
core = Core(core_backend_vol)
cache2.add_core(core, try_add=True)
core_vol = CoreVolume(core, open=True)
assert md5 == core_vol.md5()
def write_vol(vol, queue, data):
data_size = len(data)
subdata_size_max = int(Size.from_MiB(32))
for offset in range(0, data_size, subdata_size_max):
subdata_size = min(data_size - offset, subdata_size_max)
subdata = Data.from_bytes(data, offset, subdata_size)
comp = OcfCompletion([("error", c_int)])
io = vol.new_io(queue, offset, subdata_size, IoDir.WRITE, 0, 0,)
io.set_data(subdata)
io.callback = comp.callback
io.submit()
comp.wait()
def test_failover_active_first(pyocf_2_ctx):
ctx1 = pyocf_2_ctx[0]
ctx2 = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
prim_cache_backend_vol = RamVolume(Size.from_MiB(150))
core_backend_vol = RamVolume(Size.from_MiB(1))
# active cache
cache1 = Cache.start_on_device(
prim_cache_backend_vol, ctx1, cache_mode=mode, cache_line_size=cls
)
core = Core(core_backend_vol)
cache1.add_core(core)
vol = CoreVolume(core, open=True)
queue1 = cache1.get_default_queue()
# some I/O
r = (
Rio()
.target(vol)
.njobs(1)
.readwrite(ReadWrite.WRITE)
.size(Size.from_MiB(1))
.qd(1)
.run([queue1])
)
# capture checksum before simulated active host failure
data_md5 = vol.md5()
prim_cache_backend_vol.offline()
with pytest.raises(OcfError) as ex:
cache1.stop()
assert ex.value.error_code == OcfErrorCode.OCF_ERR_WRITE_CACHE
# capture a copy of active cache instance data
data = prim_cache_backend_vol.get_bytes()
cache_md5 = prim_cache_backend_vol.md5()
# setup standby cache
sec_cache_backend_vol = RamVolume(Size.from_MiB(150))
cache2 = Cache(owner=ctx2, cache_mode=mode, cache_line_size=cls)
cache2.start_cache()
cache2.standby_attach(sec_cache_backend_vol)
vol2 = CacheVolume(cache2, open=True)
queue = cache2.get_default_queue()
# standby cache exported object volume
cache2_exp_obj_vol = CacheVolume(cache2, open=True)
# just to be sure
assert sec_cache_backend_vol.get_bytes() != prim_cache_backend_vol.get_bytes()
# write content of active cache volume to passive cache exported obj
write_vol(vol2, queue, data)
assert cache_md5 == cache2_exp_obj_vol.md5()
# volumes should have the same data
assert sec_cache_backend_vol.get_bytes() == prim_cache_backend_vol.get_bytes()
# failover
cache2.standby_detach()
cache2.standby_activate(sec_cache_backend_vol, open_cores=False)
core = Core(core_backend_vol)
cache2.add_core(core, try_add=True)
vol = CoreVolume(core, open=True)
# check data consistency
assert data_md5 == vol.md5()
def test_standby_load_writes_count(pyocf_ctx):
# Prepare a volume with valid metadata
device = RamVolume(Size.from_MiB(40))
cache = Cache.start_on_device(device, cache_mode=CacheMode.WB)
cache.stop()
device.reset_stats()
cache = Cache(owner=pyocf_ctx)
cache.start_cache()
cache.standby_load(device, perform_test=False)
assert device.get_stats()[IoDir.WRITE] == 0
def test_failover_line_size_mismatch(pyocf_2_ctx):
ctx = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
cls2 = CacheLineSize.LINE_64KiB
vol1 = RamVolume(Size.from_MiB(150), uuid="cv1")
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.attach_device(vol1, force=False)
core_vol = RamVolume(Size.from_MiB(150))
core = Core(core_vol)
cache.add_core(core)
data = vol1.get_bytes()
cache.stop()
vol1 = None
vol2 = RamVolume(Size.from_MiB(150), uuid="cv2")
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls2)
cache.start_cache()
cache.standby_attach(vol2)
cache_vol = CacheVolume(cache, open=True)
write_vol(cache_vol, cache.get_default_queue(), data)
cache.get_conf()["cache_line_size"] == cls2
cache.standby_detach()
# first attempt to activate with size mismatch
with pytest.raises(OcfError) as ex:
cache.standby_activate(vol2)
assert ex.value.error_code == OcfErrorCode.OCF_ERR_START_CACHE_FAIL
# second attempt to activate with size mismatch
with pytest.raises(OcfError) as ex:
cache.standby_activate(vol2)
assert ex.value.error_code == OcfErrorCode.OCF_ERR_START_CACHE_FAIL
cache.stop()
cache = Cache(owner=ctx, cache_mode=mode, cache_line_size=cls)
cache.start_cache()
cache.standby_load(vol2)
cache.standby_detach()
cache.standby_activate(vol2)
cache.get_conf()["cache_line_size"] == cls
cache.stop()
def test_failover_passive_first(pyocf_2_ctx):
ctx1 = pyocf_2_ctx[0]
ctx2 = pyocf_2_ctx[1]
mode = CacheMode.WB
cls = CacheLineSize.LINE_4KiB
prim_cache_backend_vol = RamVolume(Size.from_MiB(150))
core_backend_vol = RamVolume(Size.from_MiB(1))
sec_cache_backend_vol = RamVolume(Size.from_MiB(150))
# passive cache with directly on ram disk
cache2 = Cache(owner=ctx2, cache_mode=mode, cache_line_size=cls)
cache2.start_cache()
cache2.standby_attach(sec_cache_backend_vol)
# volume replicating cache1 ramdisk writes to cache2 cache exported object
cache2_exp_obj_vol = CacheVolume(cache2, open=True)
cache1_cache_vol = ReplicatedVolume(prim_cache_backend_vol, cache2_exp_obj_vol)
# active cache
cache1 = Cache.start_on_device(
cache1_cache_vol, ctx1, cache_mode=mode, cache_line_size=cls
)
core = Core(core_backend_vol)
cache1.add_core(core)
core_vol = CoreVolume(core, open=True)
queue = cache1.get_default_queue()
# some I/O
r = (
Rio()
.target(core_vol)
.njobs(1)
.readwrite(ReadWrite.WRITE)
.size(Size.from_MiB(1))
.qd(1)
.run([queue])
)
# capture checksum before simulated active host failure
md5 = core_vol.md5()
# offline primary cache volume and stop primary cache to simulate active host
# failure
cache1_cache_vol.offline()
with pytest.raises(OcfError) as ex:
cache1.stop()
assert ex.value.error_code == OcfErrorCode.OCF_ERR_WRITE_CACHE
# failover
cache2.standby_detach()
cache2.standby_activate(sec_cache_backend_vol, open_cores=False)
# add core explicitly with "try_add" to workaround pyocf limitations
core = Core(core_backend_vol)
cache2.add_core(core, try_add=True)
core_vol = CoreVolume(core, open=True)
assert md5 == core_vol.md5()

View File

@ -15,10 +15,13 @@ from pyocf.types.cache import (
AcpParams,
NhitParams,
)
from pyocf.types.ctx import OcfCtx
from pyocf.types.data import Data
from pyocf.types.core import Core
from pyocf.types.volume import ErrorDevice, RamVolume, VOLUME_POISON
from pyocf.types.volume_core import CoreVolume
from pyocf.types.volume_cache import CacheVolume
from pyocf.types.io import IoDir
from pyocf.types.ioclass import IoClassesInfo, IoClassInfo
from pyocf.utils import Size as S
@ -56,19 +59,51 @@ def ocf_read(vol, queue, offset):
return data.get_bytes()[0]
def prepare_failover(pyocf_2_ctx, cache_backend_vol, error_io_seq_no):
ctx1 = pyocf_2_ctx[0]
ctx2 = pyocf_2_ctx[1]
cache2 = Cache(owner=ctx2)
cache2.start_cache()
cache2.standby_attach(cache_backend_vol)
cache2_exp_obj_vol = CacheVolume(cache2, open=True)
error_io = {IoDir.WRITE: error_io_seq_no}
err_vol = ErrorDevice(cache2_exp_obj_vol, error_seq_no=error_io, armed=False)
cache = Cache.start_on_device(err_vol, cache_mode=CacheMode.WB, owner=ctx1)
return cache, cache2, err_vol
def prepare_normal(pyocf_2_ctx, cache_backend_vol, error_io_seq_no):
ctx1 = pyocf_2_ctx[0]
error_io = {IoDir.WRITE: error_io_seq_no}
err_vol = ErrorDevice(cache_backend_vol, error_seq_no=error_io, armed=False)
cache = Cache.start_on_device(err_vol, cache_mode=CacheMode.WB, owner=ctx1)
return cache, err_vol
def mngmt_op_surprise_shutdown_test(
pyocf_ctx, mngt_func, prepare_func, consistency_check_func
pyocf_2_ctx, failover, mngt_func, prepare_func, consistency_check_func
):
error_triggered = True
error_io_seq_no = 0
while error_triggered:
# Start cache device without error injection
error_io = {IoDir.WRITE: error_io_seq_no}
device = ErrorDevice(
mngmt_op_surprise_shutdown_test_cache_size, armed=False, error_seq_no=error_io
cache_backend_vol = RamVolume(mngmt_op_surprise_shutdown_test_cache_size)
if failover:
cache, cache2, err_vol = prepare_failover(
pyocf_2_ctx, cache_backend_vol, error_io_seq_no
)
else:
cache, err_vol = prepare_normal(
pyocf_2_ctx, cache_backend_vol, error_io_seq_no
)
cache = Cache.start_on_device(device, cache_mode=CacheMode.WB)
if prepare_func:
prepare_func(cache)
@ -77,17 +112,17 @@ def mngmt_op_surprise_shutdown_test(
cache.save()
# initiate error injection starting at write no @error_io_seq_no
device.arm()
err_vol.arm()
# call tested management function
status = 0
try:
mngt_func(cache)
status = OcfErrorCode.OCF_OK
except OcfError as ex:
status = ex.error_code
# if error was injected we expect mngmt op error
error_triggered = device.error_triggered()
error_triggered = err_vol.error_triggered()
assert error_triggered == (status != 0)
if error_triggered:
assert (
@ -98,12 +133,18 @@ def mngmt_op_surprise_shutdown_test(
# stop cache with error injection still on
with pytest.raises(OcfError) as ex:
cache.stop()
cache = None
assert ex.value.error_code == OcfErrorCode.OCF_ERR_WRITE_CACHE
# disable error injection and load the cache
device.disarm()
# discard error volume
err_vol.disarm()
cache = Cache.load_from_device(device, open_cores=True)
if failover:
cache2.standby_detach()
cache2.standby_activate(cache_backend_vol, open_cores=True)
cache = cache2
else:
cache = Cache.load_from_device(err_vol, open_cores=True)
# run consistency check
if consistency_check_func is not None:
@ -118,7 +159,8 @@ def mngmt_op_surprise_shutdown_test(
# power failure during core insert
@pytest.mark.security
def test_surprise_shutdown_add_core(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_add_core(pyocf_2_ctx, failover):
core_device = RamVolume(S.from_MiB(10))
def check_core(cache, error_triggered):
@ -132,13 +174,16 @@ def test_surprise_shutdown_add_core(pyocf_ctx):
def check_func(cache, error_triggered):
check_core(cache, error_triggered)
mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, None, check_func)
mngmt_op_surprise_shutdown_test(
pyocf_2_ctx, failover, tested_func, None, check_func
)
# power failure during core removal
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_remove_core(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_remove_core(pyocf_2_ctx, failover):
core_device = RamVolume(S.from_MiB(10))
core = Core.using_device(core_device)
@ -152,12 +197,15 @@ def test_surprise_shutdown_remove_core(pyocf_ctx):
stats = cache.get_stats()
assert stats["conf"]["core_count"] == (1 if error_triggered else 0)
mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, prepare_func, check_func)
mngmt_op_surprise_shutdown_test(
pyocf_2_ctx, failover, tested_func, prepare_func, check_func
)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_remove_core_with_data(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_remove_core_with_data(pyocf_2_ctx, failover):
io_offset = mngmt_op_surprise_shutdown_test_io_offset
core_device = RamVolume(S.from_MiB(10))
core = Core.using_device(core_device, name="core1")
@ -180,13 +228,16 @@ def test_surprise_shutdown_remove_core_with_data(pyocf_ctx):
vol = CoreVolume(core, open=True)
assert ocf_read(vol, cache.get_default_queue(), io_offset) == 0xAA
mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, prepare_func, check_func)
mngmt_op_surprise_shutdown_test(
pyocf_2_ctx, failover, tested_func, prepare_func, check_func
)
# power failure during core add after previous core removed
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_swap_core(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_swap_core(pyocf_2_ctx, failover):
core_device_1 = RamVolume(S.from_MiB(10), uuid="dev1")
core_device_2 = RamVolume(S.from_MiB(10), uuid="dev2")
core1 = Core.using_device(core_device_1, name="core1")
@ -215,13 +266,16 @@ def test_surprise_shutdown_swap_core(pyocf_ctx):
core2 = cache.get_core_by_name("core2")
assert core2.device.uuid == "dev2"
mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, prepare, check_func)
mngmt_op_surprise_shutdown_test(
pyocf_2_ctx, failover, tested_func, prepare, check_func
)
# power failure during core add after previous core removed
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_swap_core_with_data(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_swap_core_with_data(pyocf_2_ctx, failover):
core_device_1 = RamVolume(S.from_MiB(10), uuid="dev1")
core_device_2 = RamVolume(S.from_MiB(10), uuid="dev2")
core1 = Core.using_device(core_device_1, name="core1")
@ -231,7 +285,12 @@ def test_surprise_shutdown_swap_core_with_data(pyocf_ctx):
cache.add_core(core1)
vol = CoreVolume(core1, open=True)
cache.save()
ocf_write(vol, cache.get_default_queue(), 0xAA, mngmt_op_surprise_shutdown_test_io_offset)
ocf_write(
vol,
cache.get_default_queue(),
0xAA,
mngmt_op_surprise_shutdown_test_io_offset,
)
cache.remove_core(core1)
cache.save()
@ -256,39 +315,56 @@ def test_surprise_shutdown_swap_core_with_data(pyocf_ctx):
vol2 = CoreVolume(core2, open=True)
assert core2.device.uuid == "dev2"
assert (
ocf_read(vol2, cache.get_default_queue(), mngmt_op_surprise_shutdown_test_io_offset)
ocf_read(
vol2,
cache.get_default_queue(),
mngmt_op_surprise_shutdown_test_io_offset,
)
== VOLUME_POISON
)
mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, prepare, check_func)
mngmt_op_surprise_shutdown_test(
pyocf_2_ctx, failover, tested_func, prepare, check_func
)
# make sure there are no crashes when cache start is interrupted
# 1. is this checksum mismatch actually expected and the proper way
# to avoid loading improperly initialized cache?
# 2. uuid checksum mismatch should not allow cache to load
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_start_cache(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_start_cache(pyocf_2_ctx, failover):
ctx1 = pyocf_2_ctx[0]
ctx2 = pyocf_2_ctx[1]
error_triggered = True
error_io_seq_no = 0
while error_triggered:
# Start cache device without error injection
error_io = {IoDir.WRITE: error_io_seq_no}
device = ErrorDevice(
mngmt_op_surprise_shutdown_test_cache_size, error_seq_no=error_io, armed=True
ramdisk = RamVolume(mngmt_op_surprise_shutdown_test_cache_size)
if failover:
cache2 = Cache(owner=ctx2)
cache2.start_cache()
cache2.standby_attach(ramdisk)
cache2_exp_obj_vol = CacheVolume(cache2, open=True)
err_device = ErrorDevice(
cache2_exp_obj_vol, error_seq_no=error_io, armed=True
)
else:
err_device = ErrorDevice(ramdisk, error_seq_no=error_io, armed=True)
# call tested management function
status = 0
try:
cache = Cache.start_on_device(device, cache_mode=CacheMode.WB)
cache = Cache.start_on_device(err_device, cache_mode=CacheMode.WB)
status = OcfErrorCode.OCF_OK
except OcfError as ex:
status = ex.error_code
# if error was injected we expect mngmt op error
error_triggered = device.error_triggered()
error_triggered = err_device.error_triggered()
assert error_triggered == (status != 0)
if not error_triggered:
@ -299,16 +375,27 @@ def test_surprise_shutdown_start_cache(pyocf_ctx):
break
# disable error injection and load the cache
device.disarm()
err_device.disarm()
cache = None
if failover:
try:
cache = Cache.load_from_device(device)
cache2.standby_detach()
cache2.standby_activate(ramdisk, open_cores=True)
cache = cache2
except OcfError:
cache2.stop()
cache2 = None
cache = None
else:
try:
cache = Cache.load_from_device(err_device, open_cores=True)
except OcfError:
cache = None
if cache is not None:
cache.stop()
cache = None
# advance error injection point
error_io_seq_no += 1
@ -316,7 +403,8 @@ def test_surprise_shutdown_start_cache(pyocf_ctx):
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_stop_cache(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_stop_cache(pyocf_2_ctx, failover):
core_device = RamVolume(S.from_MiB(10))
error_triggered = True
error_io_seq_no = 0
@ -324,13 +412,15 @@ def test_surprise_shutdown_stop_cache(pyocf_ctx):
while error_triggered:
# Start cache device without error injection
error_io = {IoDir.WRITE: error_io_seq_no}
device = ErrorDevice(
mngmt_op_surprise_shutdown_test_cache_size, error_seq_no=error_io, armed=False
)
ramdisk = RamVolume(mngmt_op_surprise_shutdown_test_cache_size)
if failover:
cache, cache2, device = prepare_failover(
pyocf_2_ctx, ramdisk, error_io_seq_no
)
else:
cache, device = prepare_normal(pyocf_2_ctx, ramdisk, error_io_seq_no)
# setup cache and insert some data
cache = Cache.start_on_device(device, cache_mode=CacheMode.WB)
core = Core(device=core_device)
cache.add_core(core)
vol = CoreVolume(core, open=True)
@ -350,7 +440,7 @@ def test_surprise_shutdown_stop_cache(pyocf_ctx):
if error_triggered:
assert status == OcfErrorCode.OCF_ERR_WRITE_CACHE
else:
assert status == 0
assert status == OcfErrorCode.OCF_OK
if not error_triggered:
break
@ -361,7 +451,13 @@ def test_surprise_shutdown_stop_cache(pyocf_ctx):
assert core_device.get_bytes()[io_offset] == VOLUME_POISON
if failover:
cache2.standby_detach()
cache2.standby_activate(ramdisk, open_cores=False)
cache = cache2
else:
cache = Cache.load_from_device(device, open_cores=False)
stats = cache.get_stats()
if stats["conf"]["core_count"] == 1:
assert stats["usage"]["occupancy"]["value"] == 1
@ -377,22 +473,25 @@ def test_surprise_shutdown_stop_cache(pyocf_ctx):
@pytest.mark.security
def test_surprise_shutdown_cache_reinit(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_cache_reinit(pyocf_2_ctx, failover):
core_device = RamVolume(S.from_MiB(10))
error_io = {IoDir.WRITE: 0}
error_io_seq_no = 0
io_offset = mngmt_op_surprise_shutdown_test_io_offset
error_triggered = True
while error_triggered:
# Start cache device without error injection
device = ErrorDevice(
mngmt_op_surprise_shutdown_test_cache_size, error_seq_no=error_io, armed=False
)
ramdisk = RamVolume(mngmt_op_surprise_shutdown_test_cache_size)
if failover:
cache, cache2, device = prepare_failover(
pyocf_2_ctx, ramdisk, error_io_seq_no
)
else:
cache, device = prepare_normal(pyocf_2_ctx, ramdisk, error_io_seq_no)
# start WB
cache = Cache.start_on_device(device, cache_mode=CacheMode.WB)
core = Core(device=core_device)
cache.add_core(core)
vol = CoreVolume(core, open=True)
@ -402,6 +501,7 @@ def test_surprise_shutdown_cache_reinit(pyocf_ctx):
ocf_write(vol, queue, 0xAA, io_offset)
cache.stop()
cache = None
assert core_device.get_bytes()[io_offset] == VOLUME_POISON
@ -429,8 +529,19 @@ def test_surprise_shutdown_cache_reinit(pyocf_ctx):
cache = None
status = OcfErrorCode.OCF_OK
if failover:
try:
cache = Cache.load_from_device(device)
cache2.standby_detach()
cache2.standby_activate(ramdisk, open_cores=True)
cache = cache2
except OcfError as ex:
cache2.stop()
cache2 = None
status = ex.error_code
else:
try:
cache = Cache.load_from_device(device, open_cores=True)
except OcfError as ex:
status = ex.error_code
@ -442,14 +553,17 @@ def test_surprise_shutdown_cache_reinit(pyocf_ctx):
assert stats["usage"]["occupancy"]["value"] == 0
cache.add_core(core)
vol = CoreVolume(core, open=True)
assert ocf_read(vol, cache.get_default_queue(), io_offset) == VOLUME_POISON
assert (
ocf_read(vol, cache.get_default_queue(), io_offset) == VOLUME_POISON
)
cache.stop()
cache = None
error_io[IoDir.WRITE] += 1
error_io_seq_no += 1
def _test_surprise_shutdown_mngmt_generic(pyocf_ctx, func):
def _test_surprise_shutdown_mngmt_generic(pyocf_2_ctx, failover, func):
core_device = RamVolume(S.from_MiB(10))
core = Core(device=core_device)
@ -460,87 +574,93 @@ def _test_surprise_shutdown_mngmt_generic(pyocf_ctx, func):
func(cache, core)
cache.save()
mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None)
mngmt_op_surprise_shutdown_test(pyocf_2_ctx, failover, test, prepare, None)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_change_cache_mode(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_change_cache_mode(pyocf_2_ctx, failover):
_test_surprise_shutdown_mngmt_generic(
pyocf_ctx, lambda cache, core: cache.change_cache_mode(CacheMode.WT)
pyocf_2_ctx, failover, lambda cache, core: cache.change_cache_mode(CacheMode.WT)
)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_set_cleaning_policy(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
@pytest.mark.parametrize("start_clp", CleaningPolicy)
@pytest.mark.parametrize("end_clp", CleaningPolicy)
def test_surprise_shutdown_set_cleaning_policy(
pyocf_2_ctx, failover, start_clp, end_clp
):
core_device = RamVolume(S.from_MiB(10))
core = Core(device=core_device)
for c1 in CleaningPolicy:
for c2 in CleaningPolicy:
def prepare(cache):
cache.add_core(core)
cache.set_cleaning_policy(c1)
cache.set_cleaning_policy(start_clp)
cache.save()
def test(cache):
cache.set_cleaning_policy(c2)
cache.set_cleaning_policy(end_clp)
cache.save()
mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None)
mngmt_op_surprise_shutdown_test(pyocf_2_ctx, failover, test, prepare, None)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_set_seq_cut_off_policy(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
@pytest.mark.parametrize("start_scp", SeqCutOffPolicy)
@pytest.mark.parametrize("end_scp", SeqCutOffPolicy)
def test_surprise_shutdown_set_seq_cut_off_policy(pyocf_2_ctx, failover, start_scp, end_scp):
core_device = RamVolume(S.from_MiB(10))
core = Core(device=core_device)
for s1 in SeqCutOffPolicy:
for s2 in SeqCutOffPolicy:
def prepare(cache):
cache.add_core(core)
cache.set_seq_cut_off_policy(s1)
cache.set_seq_cut_off_policy(start_scp)
cache.save()
def test(cache):
cache.set_seq_cut_off_policy(s2)
cache.set_seq_cut_off_policy(end_scp)
cache.save()
mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None)
mngmt_op_surprise_shutdown_test(pyocf_2_ctx, failover, test, prepare, None)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_set_seq_cut_off_promotion(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_set_seq_cut_off_promotion(pyocf_2_ctx, failover):
_test_surprise_shutdown_mngmt_generic(
pyocf_ctx, lambda cache, core: cache.set_seq_cut_off_promotion(256)
pyocf_2_ctx, failover, lambda cache, core: cache.set_seq_cut_off_promotion(256)
)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_set_seq_cut_off_threshold(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_set_seq_cut_off_threshold(pyocf_2_ctx, failover):
_test_surprise_shutdown_mngmt_generic(
pyocf_ctx, lambda cache, core: cache.set_seq_cut_off_threshold(S.from_MiB(2).B)
pyocf_2_ctx,
failover,
lambda cache, core: cache.set_seq_cut_off_threshold(S.from_MiB(2).B),
)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_set_cleaning_policy_param(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
@pytest.mark.parametrize("clp", [c for c in CleaningPolicy if c != CleaningPolicy.NOP])
def test_surprise_shutdown_set_cleaning_policy_param(pyocf_2_ctx, failover, clp):
core_device = RamVolume(S.from_MiB(10))
core = Core(device=core_device)
for pol in CleaningPolicy:
if pol == CleaningPolicy.NOP:
continue
if pol == CleaningPolicy.ALRU:
if clp == CleaningPolicy.ALRU:
params = AlruParams
elif pol == CleaningPolicy.ACP:
elif clp == CleaningPolicy.ACP:
params = AcpParams
else:
# add handler for new policy here
@ -550,12 +670,12 @@ def test_surprise_shutdown_set_cleaning_policy_param(pyocf_ctx):
def prepare(cache):
cache.add_core(core)
cache.set_cleaning_policy(pol)
cache.set_cleaning_policy(clp)
cache.save()
def test(cache):
val = None
if pol == CleaningPolicy.ACP:
if clp == CleaningPolicy.ACP:
if p == AcpParams.WAKE_UP_TIME:
val = 5000
elif p == AcpParams.FLUSH_MAX_BUFFERS:
@ -563,7 +683,7 @@ def test_surprise_shutdown_set_cleaning_policy_param(pyocf_ctx):
else:
# add handler for new param here
assert False
elif pol == CleaningPolicy.ALRU:
elif clp == CleaningPolicy.ALRU:
if p == AlruParams.WAKE_UP_TIME:
val = 2000
elif p == AlruParams.STALE_BUFFER_TIME:
@ -575,42 +695,45 @@ def test_surprise_shutdown_set_cleaning_policy_param(pyocf_ctx):
else:
# add handler for new param here
assert False
cache.set_cleaning_policy_param(pol, p, val)
cache.set_cleaning_policy_param(clp, p, val)
cache.save()
mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None)
mngmt_op_surprise_shutdown_test(pyocf_2_ctx, failover, test, prepare, None)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_set_promotion_policy(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
@pytest.mark.parametrize("start_pp", PromotionPolicy)
@pytest.mark.parametrize("end_pp", PromotionPolicy)
def test_surprise_shutdown_set_promotion_policy(
pyocf_2_ctx, failover, start_pp, end_pp
):
core_device = RamVolume(S.from_MiB(10))
core = Core(device=core_device)
for pp1 in PromotionPolicy:
for pp2 in PromotionPolicy:
def prepare(cache):
cache.add_core(core)
cache.set_promotion_policy(pp1)
cache.set_promotion_policy(start_pp)
cache.save()
def test(cache):
cache.set_promotion_policy(pp2)
cache.set_promotion_policy(end_pp)
cache.save()
mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None)
mngmt_op_surprise_shutdown_test(pyocf_2_ctx, failover, test, prepare, None)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_set_promotion_policy_param(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
@pytest.mark.parametrize("pp", PromotionPolicy)
def test_surprise_shutdown_set_promotion_policy_param(pyocf_2_ctx, failover, pp):
core_device = RamVolume(S.from_MiB(10))
core = Core(device=core_device)
for pp in PromotionPolicy:
if pp == PromotionPolicy.ALWAYS:
continue
return
if pp == PromotionPolicy.NHIT:
params = NhitParams
else:
@ -637,12 +760,13 @@ def test_surprise_shutdown_set_promotion_policy_param(pyocf_ctx):
cache.set_promotion_policy_param(pp, p, val)
cache.save()
mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None)
mngmt_op_surprise_shutdown_test(pyocf_2_ctx, failover, test, prepare, None)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_set_io_class_config(pyocf_ctx):
@pytest.mark.parametrize("failover", [False, True])
def test_surprise_shutdown_set_io_class_config(pyocf_2_ctx, failover):
core_device = RamVolume(S.from_MiB(10))
core = Core(device=core_device)
@ -697,4 +821,319 @@ def test_surprise_shutdown_set_io_class_config(pyocf_ctx):
]
assert curr_ioclass == old_ioclass or curr_ioclass == new_ioclass
mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, check)
mngmt_op_surprise_shutdown_test(pyocf_2_ctx, failover, test, prepare, check)
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_standby_activate(pyocf_ctx):
""" 1. start active cache
2. add core, insert data
3. stop
4. load standby
5. detach
6. activate <- with I/O error injection
7. standby load
8. verify consistency
"""
io_offset = mngmt_op_surprise_shutdown_test_io_offset
error_triggered = True
error_io_seq_no = 0
while error_triggered:
# Start cache device without error injection
error_io = {IoDir.WRITE: error_io_seq_no}
ramdisk = RamVolume(mngmt_op_surprise_shutdown_test_cache_size)
device = ErrorDevice(ramdisk, error_seq_no=error_io, armed=False)
core_device = RamVolume(S.from_MiB(10))
device.disarm()
# Add a core device and provide a few dirty blocks
cache = Cache.start_on_device(device, cache_mode=CacheMode.WB)
core = Core(device=core_device)
cache.add_core(core)
vol = CoreVolume(core, open=True)
ocf_write(vol, cache.get_default_queue(), 0xAA, io_offset)
original_dirty_blocks = cache.get_stats()["usage"]["dirty"]
cache.stop()
# Preapre a passive instance
cache = Cache(owner=OcfCtx.get_default())
cache.start_cache()
cache.standby_load(device)
cache.standby_detach()
device.arm()
# If the activate failes, cache should be rollbacked into the passive state
try:
cache.standby_activate(device)
status = OcfErrorCode.OCF_OK
except OcfError as ex:
status = ex.error_code
cache.stop()
# If error was injected we expect mngmt op error
error_triggered = device.error_triggered()
assert error_triggered == (status != 0)
# Activate succeeded but error injection is still enabled
if not error_triggered:
with pytest.raises(OcfError) as ex:
cache.stop()
# Disable error injection and activate cache
device.disarm()
cache = None
cache = Cache(owner=OcfCtx.get_default())
cache.start_cache()
cache.standby_load(device)
cache.standby_detach()
cache.standby_activate(device, open_cores=False)
assert cache.get_stats()["conf"]["core_count"] == 1
assert original_dirty_blocks == cache.get_stats()["usage"]["dirty"]
core = Core(device=core_device)
cache.add_core(core, try_add=True)
vol = CoreVolume(core, open=True)
assert ocf_read(vol, cache.get_default_queue(), io_offset) == 0xAA
cache.stop()
# advance error injection point
error_io_seq_no += 1
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_standby_init_clean(pyocf_ctx):
""" interrupted standby init on an empty volume """
error_triggered = True
error_io_seq_no = 0
while error_triggered:
# Start cache device without error injection
error_io = {IoDir.WRITE: error_io_seq_no}
ramdisk = RamVolume(mngmt_op_surprise_shutdown_test_cache_size)
device = ErrorDevice(ramdisk, error_seq_no=error_io, armed=True)
cache = Cache(owner=OcfCtx.get_default())
cache.start_cache()
try:
cache.standby_attach(device)
status = OcfErrorCode.OCF_OK
except OcfError as ex:
status = ex.error_code
cache.stop()
# if error was injected we expect mngmt op error
error_triggered = device.error_triggered()
assert error_triggered == (status != 0)
if not error_triggered:
# stop cache with error injection still on - expect no error in standby
# as no writes go to the disk
cache.stop()
break
# disable error injection and load the cache
device.disarm()
cache = None
cache = Cache(owner=OcfCtx.get_default())
cache.start_cache()
with pytest.raises(OcfError) as ex:
cache.standby_load(device)
assert ex.value.error_code == OcfErrorCode.OCF_ERR_NO_METADATA
cache.stop()
# advance error injection point
error_io_seq_no += 1
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_standby_init_force_1(pyocf_ctx):
""" 1. start active
2. add core, insert cacheline
3. stop cache
4. standby attach force = 1 <- with I/O injection
5. standby load
6. activate
7. verify consistency: either no metadata, empty cache or cacheline still inserted
"""
core_device = RamVolume(S.from_MiB(10))
io_offset = mngmt_op_surprise_shutdown_test_io_offset
error_triggered = True
error_io_seq_no = 0
while error_triggered:
# Start cache device without error injection
error_io = {IoDir.WRITE: error_io_seq_no}
ramdisk = RamVolume(mngmt_op_surprise_shutdown_test_cache_size)
device = ErrorDevice(ramdisk, error_seq_no=error_io, armed=False)
# start and stop cache with cacheline inserted
cache = Cache.start_on_device(device, cache_mode=CacheMode.WB)
core = Core(device=core_device)
cache.add_core(core)
vol = CoreVolume(core, open=True)
ocf_write(vol, cache.get_default_queue(), 0xAA, io_offset)
original_dirty_blocks = cache.get_stats()["usage"]["dirty"]
cache.stop()
cache = Cache(owner=OcfCtx.get_default())
cache.start_cache()
device.arm()
# attempt to reinitialize standby cache with erorr injection
try:
cache.standby_attach(device, force=True)
status = OcfErrorCode.OCF_OK
except OcfError as ex:
status = ex.error_code
# if error was injected we expect mngmt op error
error_triggered = device.error_triggered()
assert error_triggered == (status == OcfErrorCode.OCF_ERR_WRITE_CACHE)
# stop cache with error injection still on
# expect no error when stoping standby or detached cache
cache.stop()
cache = None
# disable error injection and load the cache
device.disarm()
cache = Cache(owner=OcfCtx.get_default())
cache.start_cache()
# standby load
try:
cache.standby_load(device)
cache.standby_detach()
cache.standby_activate(device, open_cores=False)
status = OcfErrorCode.OCF_OK
except OcfError as ex:
status = ex.error_code
if status != OcfErrorCode.OCF_OK:
assert status == OcfErrorCode.OCF_ERR_NO_METADATA
else:
stats = cache.get_stats()
if stats["conf"]["core_count"] == 1:
assert original_dirty_blocks == stats["usage"]["dirty"]
core = Core(device=core_device)
cache.add_core(core, try_add=True)
vol = CoreVolume(core, open=True)
assert ocf_read(vol, cache.get_default_queue(), io_offset) == 0xAA
else:
assert stats["usage"]["occupancy"]["value"] == 0
assert stats["usage"]["dirty"]["value"] == 0
core = Core(device=core_device)
cache.add_core(core)
vol = CoreVolume(core, open=True)
assert (
ocf_read(vol, cache.get_default_queue(), io_offset) == VOLUME_POISON
)
cache.stop()
error_io_seq_no += 1
@pytest.mark.security
@pytest.mark.long
def test_surprise_shutdown_standby_init_force_2(pyocf_ctx):
""" 1. start active
2. add core, insert cacheline
3. stop cache
4. standby attach force = 1 <- with I/O injection
5. load cache (standard load)
6. verify consistency: either no metadata, empty cache or cacheline still inserted
"""
core_device = RamVolume(S.from_MiB(10))
io_offset = mngmt_op_surprise_shutdown_test_io_offset
error_triggered = True
error_io_seq_no = 0
while error_triggered:
# Start cache device without error injection
error_io = {IoDir.WRITE: error_io_seq_no}
ramdisk = RamVolume(mngmt_op_surprise_shutdown_test_cache_size)
device = ErrorDevice(ramdisk, error_seq_no=error_io, armed=False)
# start and stop cache with cacheline inserted
cache = Cache.start_on_device(device, cache_mode=CacheMode.WB)
core = Core(device=core_device)
cache.add_core(core)
vol = CoreVolume(core, open=True)
ocf_write(vol, cache.get_default_queue(), 0xAA, io_offset)
original_dirty_blocks = cache.get_stats()["usage"]["dirty"]
cache.stop()
cache = Cache(owner=OcfCtx.get_default())
cache.start_cache()
device.arm()
# attempt to reinitialize standby cache with erorr injection
try:
cache.standby_attach(device, force=True)
status = OcfErrorCode.OCF_OK
except OcfError as ex:
status = ex.error_code
# if error was injected we expect mngmt op error
error_triggered = device.error_triggered()
assert error_triggered == (status == OcfErrorCode.OCF_ERR_WRITE_CACHE)
# stop cache with error injection still on
# expect no error when stoping standby or detached cache
cache.stop()
cache = None
# disable error injection and load the cache
device.disarm()
# standard load
try:
cache = Cache.load_from_device(device, open_cores=False)
status = OcfErrorCode.OCF_OK
except OcfError as ex:
status = ex.error_code
if status != OcfErrorCode.OCF_OK:
assert status == OcfErrorCode.OCF_ERR_NO_METADATA
else:
stats = cache.get_stats()
if stats["conf"]["core_count"] == 1:
assert original_dirty_blocks == stats["usage"]["dirty"]
core = Core(device=core_device)
cache.add_core(core, try_add=True)
vol = CoreVolume(core, open=True)
assert ocf_read(vol, cache.get_default_queue(), io_offset) == 0xAA
else:
assert stats["usage"]["occupancy"]["value"] == 0
assert stats["usage"]["dirty"]["value"] == 0
core = Core(device=core_device)
cache.add_core(core)
vol = CoreVolume(core, open=True)
assert (
ocf_read(vol, cache.get_default_queue(), io_offset) == VOLUME_POISON
)
if cache:
cache.stop()
cache = None
error_io_seq_no += 1