From cf72664c2c6a075a25619d750405c8a687f2ed1c Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Thu, 16 Dec 2021 21:00:22 +0100 Subject: [PATCH 01/13] pyocf: fix cache device config Signed-off-by: Adam Rutkowski --- tests/functional/pyocf/types/cache.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/functional/pyocf/types/cache.py b/tests/functional/pyocf/types/cache.py index 2cf86cc..cbc771b 100644 --- a/tests/functional/pyocf/types/cache.py +++ b/tests/functional/pyocf/types/cache.py @@ -75,6 +75,7 @@ class CacheAttachConfig(Structure): ("_open_cores", c_bool), ("_force", c_bool), ("_discard_on_start", c_bool), + ("_volume_params", c_void_p), ] @@ -447,8 +448,8 @@ class Cache: _cache_line_size=cache_line_size if cache_line_size else self.cache_line_size, - _force=force, _open_cores=True, + _force=force, _discard_on_start=False, ) From c7144decf8ca2506a93d1f07b5a8bbddb7e80a4d Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Tue, 14 Dec 2021 23:06:24 +0100 Subject: [PATCH 02/13] pyocf: extend error volume capabilities Adding option to 1. inject error based on I/O number 2. arm/disarm error injection for easier testing Signed-off-by: Adam Rutkowski --- tests/functional/pyocf/types/volume.py | 57 +++++++++++++++++++++----- 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/tests/functional/pyocf/types/volume.py b/tests/functional/pyocf/types/volume.py index c983c60..e4a31ef 100644 --- a/tests/functional/pyocf/types/volume.py +++ b/tests/functional/pyocf/types/volume.py @@ -305,12 +305,7 @@ class Volume(Structure): if size == 0: size = int(self.size) - int(offset) - print_buffer( - self._storage, - size, - ignore=ignore, - **kwargs - ) + print_buffer(self._storage, size, ignore=ignore, **kwargs) def md5(self): m = md5() @@ -319,20 +314,62 @@ class Volume(Structure): class ErrorDevice(Volume): - def __init__(self, size, error_sectors: set = None, uuid=None): + def __init__( + self, + size, + error_sectors: set = None, + error_seq_no: dict = None, + armed=True, + uuid=None, + ): super().__init__(size, uuid) - self.error_sectors = error_sectors or set() + self.error_sectors = error_sectors + self.error_seq_no = error_seq_no + self.armed = armed + self.io_seq_no = {IoDir.WRITE: 0, IoDir.READ: 0} + self.error = False def set_mapping(self, error_sectors: set): self.error_sectors = error_sectors def submit_io(self, io): - if io.contents._addr in self.error_sectors: + if not self.armed: + super().submit_io(io) + return + + direction = IoDir(io.contents._dir) + seq_no_match = ( + self.error_seq_no is not None + and direction in self.error_seq_no + and self.error_seq_no[direction] <= self.io_seq_no[direction] + ) + sector_match = ( + self.error_sectors is not None and io.contents._addr in self.error_sectors + ) + + self.io_seq_no[direction] += 1 + + error = True + if self.error_seq_no is not None and not seq_no_match: + error = False + if self.error_sectors is not None and not sector_match: + error = False + if error: + self.error = True io.contents._end(io, -5) - self.stats["errors"][io.contents._dir] += 1 + self.stats["errors"][direction] += 1 else: super().submit_io(io) + def arm(self): + self.armed = True + + def disarm(self): + self.armed = False + + def error_triggered(self): + return self.error + def reset_stats(self): super().reset_stats() self.stats["errors"] = {IoDir.WRITE: 0, IoDir.READ: 0} From 58dac85f7b3272f71e03bf22db1ef0d539e79a26 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Wed, 15 Dec 2021 00:45:33 +0100 Subject: [PATCH 03/13] pyocf: add ocf_core_get_uuid() wrapper Signed-off-by: Adam Rutkowski --- .../functional/pyocf/wrappers/ocf_core_wrappers.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tests/functional/pyocf/wrappers/ocf_core_wrappers.c diff --git a/tests/functional/pyocf/wrappers/ocf_core_wrappers.c b/tests/functional/pyocf/wrappers/ocf_core_wrappers.c new file mode 100644 index 0000000..0c9a359 --- /dev/null +++ b/tests/functional/pyocf/wrappers/ocf_core_wrappers.c @@ -0,0 +1,14 @@ +/* + * Copyright(c) 2021-2022 Intel Corporation + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "ocf/ocf_io.h" +#include "ocf/ocf_core.h" + +const struct ocf_volume_uuid *ocf_core_get_uuid_wrapper(ocf_core_t core) +{ + return ocf_core_get_uuid(core); +} + + From 683174c78f9c720db6b8af904d5bb3172e05773f Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Thu, 16 Dec 2021 18:27:30 +0100 Subject: [PATCH 04/13] pyocf: add option to load cache without openning cores ... this is useful to workaround current pyocf limitations and load cache with manual core insertion Signed-off-by: Adam Rutkowski --- tests/functional/pyocf/types/cache.py | 28 +++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tests/functional/pyocf/types/cache.py b/tests/functional/pyocf/types/cache.py index cbc771b..6d8fc0f 100644 --- a/tests/functional/pyocf/types/cache.py +++ b/tests/functional/pyocf/types/cache.py @@ -426,7 +426,12 @@ class Cache: raise OcfError("Error adding partition to cache", status) def configure_device( - self, device, force=False, perform_test=True, cache_line_size=None + self, + device, + force=False, + perform_test=True, + cache_line_size=None, + open_cores=True, ): self.device = device self.device_name = device.uuid @@ -448,15 +453,22 @@ class Cache: _cache_line_size=cache_line_size if cache_line_size else self.cache_line_size, - _open_cores=True, + _open_cores=open_cores, _force=force, _discard_on_start=False, ) def attach_device( - self, device, force=False, perform_test=False, cache_line_size=None + self, + device, + force=False, + perform_test=False, + cache_line_size=None, + open_cores=True, ): - self.configure_device(device, force, perform_test, cache_line_size) + self.configure_device( + device, force, perform_test, cache_line_size, open_cores=open_cores + ) self.write_lock() c = OcfCompletion([("cache", c_void_p), ("priv", c_void_p), ("error", c_int)]) @@ -484,8 +496,8 @@ class Cache: if c.results["error"]: raise OcfError("Attaching cache device failed", c.results["error"]) - def load_cache(self, device): - self.configure_device(device) + def load_cache(self, device, open_cores=True): + self.configure_device(device, open_cores=open_cores) c = OcfCompletion([("cache", c_void_p), ("priv", c_void_p), ("error", c_int)]) device.owner.lib.ocf_mngt_cache_load( self.cache_handle, byref(self.dev_cfg), c, None @@ -496,12 +508,12 @@ class Cache: raise OcfError("Loading cache device failed", c.results["error"]) @classmethod - def load_from_device(cls, device, name="cache"): + def load_from_device(cls, device, name="cache", open_cores=True): c = cls(name=name, owner=device.owner) c.start_cache() try: - c.load_cache(device) + c.load_cache(device, open_cores=open_cores) except: # noqa E722 c.stop() raise From c43059a4cfc7f7972162e9bf3313eefc5941c34e Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Thu, 16 Dec 2021 21:02:41 +0100 Subject: [PATCH 05/13] pyocf: add get_bytes() function to Volume and Data Signed-off-by: Adam Rutkowski --- tests/functional/pyocf/types/data.py | 4 ++++ tests/functional/pyocf/types/volume.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/tests/functional/pyocf/types/data.py b/tests/functional/pyocf/types/data.py index 6e14266..6e6eec3 100644 --- a/tests/functional/pyocf/types/data.py +++ b/tests/functional/pyocf/types/data.py @@ -223,3 +223,7 @@ class Data: m = md5() m.update(string_at(self.handle, self.size)) return m.hexdigest() + + def get_bytes(self): + return string_at(self.handle, self.size) + diff --git a/tests/functional/pyocf/types/volume.py b/tests/functional/pyocf/types/volume.py index e4a31ef..1b54b9d 100644 --- a/tests/functional/pyocf/types/volume.py +++ b/tests/functional/pyocf/types/volume.py @@ -312,6 +312,9 @@ class Volume(Structure): m.update(string_at(self._storage, self.size)) return m.hexdigest() + def get_bytes(self): + return string_at(self._storage, self.size) + class ErrorDevice(Volume): def __init__( From 5fe217d96f9dc9cd8114da849eb39e67d2f64658 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Fri, 17 Dec 2021 16:03:50 +0100 Subject: [PATCH 06/13] pyocf: set proper OCF error codes in Volume Signed-off-by: Adam Rutkowski --- tests/functional/pyocf/types/data.py | 1 - tests/functional/pyocf/types/volume.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/functional/pyocf/types/data.py b/tests/functional/pyocf/types/data.py index 6e6eec3..fad4396 100644 --- a/tests/functional/pyocf/types/data.py +++ b/tests/functional/pyocf/types/data.py @@ -226,4 +226,3 @@ class Data: def get_bytes(self): return string_at(self.handle, self.size) - diff --git a/tests/functional/pyocf/types/volume.py b/tests/functional/pyocf/types/volume.py index 1b54b9d..46c189b 100644 --- a/tests/functional/pyocf/types/volume.py +++ b/tests/functional/pyocf/types/volume.py @@ -203,7 +203,7 @@ class Volume(Structure): return -1 if volume.opened: - return OcfErrorCode.OCF_ERR_NOT_OPEN_EXC + return -OcfErrorCode.OCF_ERR_NOT_OPEN_EXC Volume._instances_[ref] = weakref.ref(volume) @@ -269,7 +269,7 @@ class Volume(Structure): discard.contents._end(discard, 0) except: # noqa E722 - discard.contents._end(discard, -5) + discard.contents._end(discard, -OcfErrorCode.OCF_ERR_NOT_SUPP) def get_stats(self): return self.stats @@ -299,7 +299,7 @@ class Volume(Structure): io.contents._end(io, 0) except: # noqa E722 - io.contents._end(io, -5) + io.contents._end(io, -OcfErrorCode.OCF_ERR_IO) def dump(self, offset=0, size=0, ignore=VOLUME_POISON, **kwargs): if size == 0: @@ -359,7 +359,7 @@ class ErrorDevice(Volume): error = False if error: self.error = True - io.contents._end(io, -5) + io.contents._end(io, -OcfErrorCode.OCF_ERR_IO) self.stats["errors"][direction] += 1 else: super().submit_io(io) From ed5185e8708deb0745f83e16ca152e9f7be1a08e Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Mon, 20 Dec 2021 12:05:07 +0100 Subject: [PATCH 07/13] pyocf: make io class config more usable Unify field param names and include io class id in info struct. Signed-off-by: Adam Rutkowski --- tests/functional/pyocf/types/cache.py | 5 +++-- tests/functional/pyocf/types/ioclass.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/functional/pyocf/types/cache.py b/tests/functional/pyocf/types/cache.py index 6d8fc0f..5434391 100644 --- a/tests/functional/pyocf/types/cache.py +++ b/tests/functional/pyocf/types/cache.py @@ -349,6 +349,7 @@ class Cache: raise OcfError("Error retriving ioclass info", status) return { + "_class_id": part_id, "_name": ioclass_info._name.decode("ascii"), "_cache_mode": ioclass_info._cache_mode, "_priority": int(ioclass_info._priority), @@ -403,7 +404,7 @@ class Cache: ioclasses_info._config[i]._name = ( ioclass_info._name if len(ioclass_info._name) > 0 else 0 ) - ioclasses_info._config[i]._prio = ioclass_info._priority + ioclasses_info._config[i]._priority = ioclass_info._priority ioclasses_info._config[i]._cache_mode = ioclass_info._cache_mode ioclasses_info._config[i]._max_size = ioclass_info._max_size @@ -411,7 +412,7 @@ class Cache: ioclasses_info._config[part_id]._name = name.encode("utf-8") ioclasses_info._config[part_id]._cache_mode = int(cache_mode) - ioclasses_info._config[part_id]._prio = priority + ioclasses_info._config[part_id]._priority = priority ioclasses_info._config[part_id]._max_size = max_size self.write_lock() diff --git a/tests/functional/pyocf/types/ioclass.py b/tests/functional/pyocf/types/ioclass.py index 1d9f9f8..94fe16f 100644 --- a/tests/functional/pyocf/types/ioclass.py +++ b/tests/functional/pyocf/types/ioclass.py @@ -25,7 +25,7 @@ class IoClassConfig(Structure): ("_max_size", c_uint32), ("_name", c_char_p), ("_cache_mode", c_int), - ("_prio", c_uint16), + ("_priority", c_uint16), ] From 7f60aa0dd68421698477da50ca389098eb8a4a37 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Mon, 20 Dec 2021 15:25:38 +0100 Subject: [PATCH 08/13] pyocf: fix error handling in Cache::stop() Write error in cache stop means the instance was in fact stopped. Signed-off-by: Adam Rutkowski --- tests/functional/pyocf/types/cache.py | 6 +++++- tests/functional/pyocf/types/shared.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/functional/pyocf/types/cache.py b/tests/functional/pyocf/types/cache.py index 5434391..b440977 100644 --- a/tests/functional/pyocf/types/cache.py +++ b/tests/functional/pyocf/types/cache.py @@ -700,7 +700,8 @@ class Cache: self.owner.lib.ocf_mngt_cache_stop(self.cache_handle, c, None) c.wait() - if c.results["error"]: + err = OcfErrorCode(-1 * c.results["error"]) + if err != OcfErrorCode.OCF_OK and err != OcfErrorCode.OCF_ERR_WRITE_CACHE: self.write_unlock() raise OcfError("Failed stopping cache", c.results["error"]) @@ -712,6 +713,9 @@ class Cache: self.owner.caches.remove(self) + if err != OcfErrorCode.OCF_OK: + raise OcfError("Failed stopping cache", c.results["error"]) + def flush(self): self.write_lock() diff --git a/tests/functional/pyocf/types/shared.py b/tests/functional/pyocf/types/shared.py index 8c2c274..d15d8a5 100644 --- a/tests/functional/pyocf/types/shared.py +++ b/tests/functional/pyocf/types/shared.py @@ -12,6 +12,7 @@ from ..utils import Size as S class OcfErrorCode(IntEnum): + OCF_OK = 0 OCF_ERR_INVAL = 1000000 OCF_ERR_AGAIN = auto() OCF_ERR_INTR = auto() From ce0bbc260ef6dca81dbb56cc9240eb522bedd1d8 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Mon, 20 Dec 2021 16:00:29 +0100 Subject: [PATCH 09/13] pycof: explicitly free Volume::_uuid_ dictionary Signed-off-by: Adam Rutkowski --- tests/functional/pyocf/types/ctx.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/functional/pyocf/types/ctx.py b/tests/functional/pyocf/types/ctx.py index 7a5e4de..31b3ada 100644 --- a/tests/functional/pyocf/types/ctx.py +++ b/tests/functional/pyocf/types/ctx.py @@ -97,6 +97,7 @@ class OcfCtx: self.cleaner = None Queue._instances_ = {} Volume._instances_ = {} + Volume._uuid_ = {} Data._instances_ = {} Logger._instances_ = {} From 366d89a9c4f7d055c48e232eabb58bfc6cb2f8e9 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Thu, 16 Dec 2021 21:02:22 +0100 Subject: [PATCH 10/13] pyocf: management operation power failure handling tests Signed-off-by: Adam Rutkowski --- .../test_management_surprise_shutdown.py | 681 ++++++++++++++++++ 1 file changed, 681 insertions(+) create mode 100644 tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py diff --git a/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py b/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py new file mode 100644 index 0000000..a1be98a --- /dev/null +++ b/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py @@ -0,0 +1,681 @@ +# Copyright(c) 2021-2022 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +# + +import pytest +from ctypes import c_int, c_void_p, byref, cast, POINTER + +from pyocf.types.cache import ( + Cache, + CacheMode, + CleaningPolicy, + SeqCutOffPolicy, + PromotionPolicy, + AlruParams, + AcpParams, + NhitParams, +) +from pyocf.types.data import Data +from pyocf.types.core import Core +from pyocf.types.volume import ErrorDevice, Volume +from pyocf.types.io import IoDir +from pyocf.types.ioclass import IoClassesInfo, IoClassInfo +from pyocf.utils import Size as S +from pyocf.types.shared import ( + OcfCompletion, + CacheLineSize, + OcfError, + OcfErrorCode, + Uuid, +) +from pyocf.ocf import OcfLib + +mngmt_op_surprise_shutdown_test_cache_size = S.from_MiB(40) +mngmt_op_surprise_shutdown_test_io_offset = S.from_MiB(4).B + + +def ocf_write(cache, core, val, offset): + data = Data.from_bytes(bytes([val] * 512)) + comp = OcfCompletion([("error", c_int)]) + io = core.new_io(cache.get_default_queue(), offset, 512, IoDir.WRITE, 0, 0) + io.set_data(data) + io.callback = comp.callback + io.submit() + comp.wait() + + +def ocf_read(cache, core, offset): + data = Data(byte_count=512) + comp = OcfCompletion([("error", c_int)]) + io = core.new_io(cache.get_default_queue(), offset, 512, IoDir.READ, 0, 0) + io.set_data(data) + io.callback = comp.callback + io.submit() + comp.wait() + return data.get_bytes()[0] + + +def mngmt_op_surprise_shutdown_test( + pyocf_ctx, mngt_func, prepare_func, consistency_check_func +): + error_triggered = True + error_io_seq_no = 0 + + while error_triggered: + # Start cache device without error injection + error_io = {IoDir.WRITE: error_io_seq_no} + device = ErrorDevice( + mngmt_op_surprise_shutdown_test_cache_size, armed=False, error_seq_no=error_io + ) + cache = Cache.start_on_device(device, cache_mode=CacheMode.WB) + + if prepare_func: + prepare_func(cache) + + # make sure cache state is persistent + cache.save() + + # initiate error injection starting at write no @error_io_seq_no + device.arm() + + # call tested management function + status = 0 + try: + mngt_func(cache) + except OcfError as ex: + status = ex.error_code + + # if error was injected we expect mngmt op error + error_triggered = device.error_triggered() + assert error_triggered == (status != 0) + if error_triggered: + assert ( + status == OcfErrorCode.OCF_ERR_WRITE_CACHE + or status == OcfErrorCode.OCF_ERR_IO + ) + + # stop cache with error injection still on + with pytest.raises(OcfError) as ex: + cache.stop() + assert ex.value.error_code == OcfErrorCode.OCF_ERR_WRITE_CACHE + + # disable error injection and load the cache + device.disarm() + + # load cache with open_cores = False to allow consistency check to add + # core with WA for pyocf object management + cache = Cache.load_from_device(device, open_cores=False) + + # run consistency check + if consistency_check_func is not None: + consistency_check_func(cache, error_triggered) + + # stop the cache + cache.stop() + + # advance error injection point + error_io_seq_no += 1 + + +# power failure during core insert +@pytest.mark.security +def test_surprise_shutdown_add_core(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + + def check_core(cache, error_triggered): + stats = cache.get_stats() + assert stats["conf"]["core_count"] == (0 if error_triggered else 1) + + def tested_func(cache): + core = Core(device=core_device, try_add=False) + cache.add_core(core) + + def check_func(cache, error_triggered): + check_core(cache, error_triggered) + + mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, None, check_func) + + +# power failure during core removal +@pytest.mark.security +def test_surprise_shutdown_remove_core(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + core = Core.using_device(core_device) + + def prepare_func(cache): + cache.add_core(core) + + def tested_func(cache): + cache.remove_core(core) + + def check_func(cache, error_triggered): + stats = cache.get_stats() + assert stats["conf"]["core_count"] == (1 if error_triggered else 0) + + mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, prepare_func, check_func) + + +@pytest.mark.security +def test_surprise_shutdown_remove_core_with_data(pyocf_ctx): + io_offset = mngmt_op_surprise_shutdown_test_io_offset + core_device = Volume(S.from_MiB(10)) + core = Core.using_device(core_device) + + def prepare_func(cache): + cache.add_core(core) + ocf_write(cache, core, 0xAA, io_offset) + + def tested_func(cache): + cache.flush() + cache.remove_core(core) + + def check_func(cache, error_triggered): + stats = cache.get_stats() + if stats["conf"]["core_count"] == 0: + assert core_device.get_bytes()[io_offset] == 0xAA + else: + core = Core(device=core_device, try_add=True) + cache.add_core(core) + assert ocf_read(cache, core, io_offset) == 0xAA + + mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, prepare_func, check_func) + + +# power failure during core add after previous core removed +@pytest.mark.security +def test_surprise_shutdown_swap_core(pyocf_ctx): + core_device_1 = Volume(S.from_MiB(10), uuid="dev1") + core_device_2 = Volume(S.from_MiB(10), uuid="dev2") + core1 = Core.using_device(core_device_1, name="core1") + core2 = Core.using_device(core_device_2, name="core2") + + def prepare(cache): + cache.add_core(core1) + cache.save() + cache.remove_core(core1) + cache.save() + + def tested_func(cache): + cache.add_core(core2) + + def check_func(cache, error_triggered): + stats = cache.get_stats() + assert stats["conf"]["core_count"] == (0 if error_triggered else 1) + core1_ptr = c_void_p() + core2_ptr = c_void_p() + ret1 = OcfLib.getInstance().ocf_core_get_by_name( + cache, "core1".encode("utf-8"), 6, byref(core1_ptr) + ) + ret2 = OcfLib.getInstance().ocf_core_get_by_name( + cache, "core2".encode("utf-8"), 6, byref(core2_ptr) + ) + assert ret1 != 0 + if error_triggered: + assert ret2 != 0 + else: + assert ret2 == 0 + uuid_ptr = cast( + cache.owner.lib.ocf_core_get_uuid_wrapper(core2_ptr), POINTER(Uuid) + ) + uuid = str(uuid_ptr.contents._data, encoding="ascii") + assert uuid == "dev2" + + mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, prepare, check_func) + + +# power failure during core add after previous core removed +@pytest.mark.security +def test_surprise_shutdown_swap_core_with_data(pyocf_ctx): + core_device_1 = Volume(S.from_MiB(10), uuid="dev1") + core_device_2 = Volume(S.from_MiB(10), uuid="dev2") + core1 = Core.using_device(core_device_1, name="core1") + core2 = Core.using_device(core_device_2, name="core2") + + def prepare(cache): + cache.add_core(core1) + cache.save() + ocf_write(cache, core1, 0xAA, mngmt_op_surprise_shutdown_test_io_offset) + cache.remove_core(core1) + cache.save() + + def tested_func(cache): + cache.add_core(core2) + + def check_func(cache, error_triggered): + stats = cache.get_stats() + assert stats["conf"]["core_count"] == (0 if error_triggered else 1) + core1_ptr = c_void_p() + core2_ptr = c_void_p() + ret1 = OcfLib.getInstance().ocf_core_get_by_name( + cache, "core1".encode("utf-8"), 6, byref(core1_ptr) + ) + ret2 = OcfLib.getInstance().ocf_core_get_by_name( + cache, "core2".encode("utf-8"), 6, byref(core2_ptr) + ) + assert ret1 != 0 + if ret2 == 0: + uuid_ptr = cast( + cache.owner.lib.ocf_core_get_uuid_wrapper(core2_ptr), POINTER(Uuid) + ) + uuid = str(uuid_ptr.contents._data, encoding="ascii") + assert uuid == "dev2" + core2 = Core(device=core_device_2, try_add=True, name="core2") + cache.add_core(core2) + assert ( + ocf_read(cache, core2, mngmt_op_surprise_shutdown_test_io_offset) + == Volume.VOLUME_POISON + ) + + mngmt_op_surprise_shutdown_test(pyocf_ctx, tested_func, prepare, check_func) + + +# make sure there are no crashes when cache start is interrupted +# 1. is this checksum mismatch actually expected and the proper way +# to avoid loading improperly initialized cache? +# 2. uuid checksum mismatch should not allow cache to load +@pytest.mark.security +def test_surprise_shutdown_start_cache(pyocf_ctx): + error_triggered = True + error_io_seq_no = 0 + + while error_triggered: + # Start cache device without error injection + error_io = {IoDir.WRITE: error_io_seq_no} + device = ErrorDevice( + mngmt_op_surprise_shutdown_test_cache_size, error_seq_no=error_io, armed=True + ) + + # call tested management function + status = 0 + try: + cache = Cache.start_on_device(device, cache_mode=CacheMode.WB) + except OcfError as ex: + status = ex.error_code + + # if error was injected we expect mngmt op error + error_triggered = device.error_triggered() + assert error_triggered == (status != 0) + + if not error_triggered: + # stop cache with error injection still on + with pytest.raises(OcfError) as ex: + cache.stop() + assert ex.value.error_code == OcfErrorCode.OCF_ERR_WRITE_CACHE + break + + # disable error injection and load the cache + device.disarm() + cache = None + + try: + cache = Cache.load_from_device(device) + except OcfError: + cache = None + + if cache is not None: + cache.stop() + + # advance error injection point + error_io_seq_no += 1 + + +@pytest.mark.security +def test_surprise_shutdown_stop_cache(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + error_triggered = True + error_io_seq_no = 0 + io_offset = mngmt_op_surprise_shutdown_test_io_offset + + while error_triggered: + # Start cache device without error injection + error_io = {IoDir.WRITE: error_io_seq_no} + device = ErrorDevice( + mngmt_op_surprise_shutdown_test_cache_size, error_seq_no=error_io, armed=False + ) + + # setup cache and insert some data + cache = Cache.start_on_device(device, cache_mode=CacheMode.WB) + core = Core(device=core_device, try_add=False) + cache.add_core(core) + ocf_write(cache, core, 0xAA, io_offset) + + # start error injection + device.arm() + + try: + cache.stop() + status = OcfErrorCode.OCF_OK + except OcfError as ex: + status = ex.error_code + + # if error was injected we expect mngmt op error + error_triggered = device.error_triggered() + if error_triggered: + assert status == OcfErrorCode.OCF_ERR_WRITE_CACHE + else: + assert status == 0 + + if not error_triggered: + break + + # disable error injection and load the cache + device.disarm() + cache = None + + assert core_device.get_bytes()[io_offset] == Volume.VOLUME_POISON + + cache = Cache.load_from_device(device, open_cores=False) + stats = cache.get_stats() + if stats["conf"]["core_count"] == 1: + assert stats["usage"]["occupancy"]["value"] == 1 + core = Core(device=core_device, try_add=True) + cache.add_core(core) + assert ocf_read(cache, core, io_offset) == 0xAA + + cache.stop() + + # advance error injection point + error_io_seq_no += 1 + + +@pytest.mark.security +def test_surprise_shutdown_cache_reinit(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + + error_io = {IoDir.WRITE: 0} + + io_offset = mngmt_op_surprise_shutdown_test_io_offset + + error_triggered = True + while error_triggered: + # Start cache device without error injection + device = ErrorDevice( + mngmt_op_surprise_shutdown_test_cache_size, error_seq_no=error_io, armed=False + ) + + # start WB + cache = Cache.start_on_device(device, cache_mode=CacheMode.WB) + core = Core(device=core_device, try_add=False) + cache.add_core(core) + + # insert dirty cacheline + ocf_write(cache, core, 0xAA, io_offset) + + cache.stop() + + assert core_device.get_bytes()[io_offset] == Volume.VOLUME_POISON + + # start error injection + device.arm() + + # power failure during cache re-initialization + try: + # sets force = True by default + cache = Cache.start_on_device(device, cache_mode=CacheMode.WB) + status = OcfErrorCode.OCF_OK + except OcfError as ex: + status = ex.error_code + cache = None + + error_triggered = device.error_triggered() + assert error_triggered == (status == OcfErrorCode.OCF_ERR_WRITE_CACHE) + + if cache: + with pytest.raises(OcfError) as ex: + cache.stop() + assert ex.value.error_code == OcfErrorCode.OCF_ERR_WRITE_CACHE + + device.disarm() + + cache = Cache.load_from_device(device) + + stats = cache.get_stats() + if stats["conf"]["core_count"] == 0: + cache.add_core(core) + assert ocf_read(cache, core, io_offset) == Volume.VOLUME_POISON + + cache.stop() + + error_io[IoDir.WRITE] += 1 + + +def _test_surprise_shutdown_mngmt_generic(pyocf_ctx, func): + core_device = Volume(S.from_MiB(10)) + core = Core(device=core_device, try_add=False) + + def prepare(cache): + cache.add_core(core) + + def test(cache): + func(cache, core) + cache.save() + + mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None) + + +@pytest.mark.security +def test_surprise_shutdown_change_cache_mode(pyocf_ctx): + _test_surprise_shutdown_mngmt_generic( + pyocf_ctx, lambda cache, core: cache.change_cache_mode(CacheMode.WT) + ) + + +@pytest.mark.security +def test_surprise_shutdown_set_cleaning_policy(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + core = Core(device=core_device, try_add=False) + + for c1 in CleaningPolicy: + for c2 in CleaningPolicy: + + def prepare(cache): + cache.add_core(core) + cache.set_cleaning_policy(c1) + cache.save() + + def test(cache): + cache.set_cleaning_policy(c2) + cache.save() + + mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None) + + +@pytest.mark.security +def test_surprise_shutdown_set_seq_cut_off_policy(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + core = Core(device=core_device, try_add=False) + + for s1 in SeqCutOffPolicy: + for s2 in SeqCutOffPolicy: + + def prepare(cache): + cache.add_core(core) + cache.set_seq_cut_off_policy(s1) + cache.save() + + def test(cache): + cache.set_seq_cut_off_policy(s2) + cache.save() + + mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None) + + +@pytest.mark.security +def test_surprise_shutdown_set_seq_cut_off_promotion(pyocf_ctx): + _test_surprise_shutdown_mngmt_generic( + pyocf_ctx, lambda cache, core: cache.set_seq_cut_off_promotion(256) + ) + + +@pytest.mark.security +def test_surprise_shutdown_set_seq_cut_off_threshold(pyocf_ctx): + _test_surprise_shutdown_mngmt_generic( + pyocf_ctx, lambda cache, core: cache.set_seq_cut_off_threshold(S.from_MiB(2).B) + ) + + +@pytest.mark.security +def test_surprise_shutdown_set_cleaning_policy_param(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + core = Core(device=core_device, try_add=False) + + for pol in CleaningPolicy: + if pol == CleaningPolicy.NOP: + continue + if pol == CleaningPolicy.ALRU: + params = AlruParams + elif pol == CleaningPolicy.ACP: + params = AcpParams + else: + # add handler for new policy here + assert False + + for p in params: + + def prepare(cache): + cache.add_core(core) + cache.set_cleaning_policy(pol) + cache.save() + + def test(cache): + val = None + if pol == CleaningPolicy.ACP: + if p == AcpParams.WAKE_UP_TIME: + val = 5000 + elif p == AcpParams.FLUSH_MAX_BUFFERS: + val = 5000 + else: + # add handler for new param here + assert False + elif pol == CleaningPolicy.ALRU: + if p == AlruParams.WAKE_UP_TIME: + val = 2000 + elif p == AlruParams.STALE_BUFFER_TIME: + val = 2000 + elif p == AlruParams.FLUSH_MAX_BUFFERS: + val = 5000 + elif p == AlruParams.ACTIVITY_THRESHOLD: + val = 500000 + else: + # add handler for new param here + assert False + cache.set_cleaning_policy_param(pol, p, val) + cache.save() + + mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None) + + +@pytest.mark.security +def test_surprise_shutdown_set_promotion_policy(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + core = Core(device=core_device, try_add=False) + + for pp1 in PromotionPolicy: + for pp2 in PromotionPolicy: + + def prepare(cache): + cache.add_core(core) + cache.set_promotion_policy(pp1) + cache.save() + + def test(cache): + cache.set_promotion_policy(pp2) + cache.save() + + mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None) + + +@pytest.mark.security +def test_surprise_shutdown_set_promotion_policy_param(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + core = Core(device=core_device, try_add=False) + + for pp in PromotionPolicy: + if pp == PromotionPolicy.ALWAYS: + continue + if pp == PromotionPolicy.NHIT: + params = NhitParams + else: + # add handler for new policy here + assert False + + for p in params: + + def prepare(cache): + cache.add_core(core) + cache.set_promotion_policy(pp) + cache.save() + + def test(cache): + val = None + if pp == PromotionPolicy.NHIT: + if p == NhitParams.INSERTION_THRESHOLD: + val = 500 + elif p == NhitParams.TRIGGER_THRESHOLD: + val = 50 + else: + # add handler for new param here + assert False + cache.set_promotion_policy_param(pp, p, val) + cache.save() + + mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, None) + + +@pytest.mark.security +def test_surprise_shutdown_set_io_class_config(pyocf_ctx): + core_device = Volume(S.from_MiB(10)) + core = Core(device=core_device, try_add=False) + + class_range = range(0, IoClassesInfo.MAX_IO_CLASSES) + old_ioclass = [ + { + "_class_id": i, + "_name": f"old_{i}" if i > 0 else "unclassified", + "_max_size": i, + "_priority": i, + "_cache_mode": int(CacheMode.WB), + } + for i in range(IoClassesInfo.MAX_IO_CLASSES) + ] + new_ioclass = [ + { + "_class_id": i, + "_name": f"new_{i}" if i > 0 else "unclassified", + "_max_size": 2 * i, + "_priority": 2 * i, + "_cache_mode": int(CacheMode.WT), + } + for i in range(IoClassesInfo.MAX_IO_CLASSES) + ] + keys = old_ioclass[0].keys() + + def set_io_class_info(cache, desc): + ioclasses_info = IoClassesInfo() + for i in range(IoClassesInfo.MAX_IO_CLASSES): + ioclasses_info._config[i]._class_id = i + ioclasses_info._config[i]._name = desc[i]["_name"].encode("utf-8") + ioclasses_info._config[i]._priority = desc[i]["_priority"] + ioclasses_info._config[i]._cache_mode = desc[i]["_cache_mode"] + ioclasses_info._config[i]._max_size = desc[i]["_max_size"] + OcfLib.getInstance().ocf_mngt_cache_io_classes_configure( + cache, byref(ioclasses_info) + ) + + def prepare(cache): + cache.add_core(core) + set_io_class_info(cache, old_ioclass) + cache.save() + + def test(cache): + set_io_class_info(cache, new_ioclass) + cache.save() + + def check(cache, error_triggered): + curr_ioclass = [ + {k: info[k] for k in keys} + for info in [cache.get_partition_info(i) for i in class_range] + ] + assert curr_ioclass == old_ioclass or curr_ioclass == new_ioclass + + mngmt_op_surprise_shutdown_test(pyocf_ctx, test, prepare, check) From ae240f5aa8bff873b89819f483af5d0ae5a6be26 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Thu, 23 Dec 2021 14:36:48 +0100 Subject: [PATCH 11/13] pyocf: test update to handle zeroed metadata in attach With atomic superblock commit during cache attach, it is possible that power failure interrupts attach operation at a point where neither new or old superblock is present - right after the superblock is cleared. Signed-off-by: Adam Rutkowski --- .../test_management_surprise_shutdown.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py b/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py index a1be98a..19bb053 100644 --- a/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py +++ b/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py @@ -427,14 +427,23 @@ def test_surprise_shutdown_cache_reinit(pyocf_ctx): device.disarm() - cache = Cache.load_from_device(device) + cache = None + status = OcfErrorCode.OCF_OK + try: + cache = Cache.load_from_device(device) + except OcfError as ex: + status = ex.error_code - stats = cache.get_stats() - if stats["conf"]["core_count"] == 0: - cache.add_core(core) - assert ocf_read(cache, core, io_offset) == Volume.VOLUME_POISON + if not cache: + assert status == OcfErrorCode.OCF_ERR_NO_METADATA + else: + stats = cache.get_stats() + if stats["conf"]["core_count"] == 0: + assert stats["usage"]["occupancy"]["value"] == 0 + cache.add_core(core) + assert ocf_read(cache, core, io_offset) == Volume.VOLUME_POISON - cache.stop() + cache.stop() error_io[IoDir.WRITE] += 1 From 2b7a2491436d353c9e1cbca5291ed280f5176460 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Tue, 28 Dec 2021 11:57:13 +0100 Subject: [PATCH 12/13] pyocf: document custom markers in pytest.ini Signed-off-by: Adam Rutkowski --- tests/functional/pytest.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/functional/pytest.ini b/tests/functional/pytest.ini index 1079615..1e71f5e 100644 --- a/tests/functional/pytest.ini +++ b/tests/functional/pytest.ini @@ -1,2 +1,5 @@ [pytest] +markers = + security: security objectives coverage + long: long, do not run by default addopts = --ignore=tests/security -m "not long" From a97bc61010c42e9c819b43496ba6e37baca1e454 Mon Sep 17 00:00:00 2001 From: Adam Rutkowski Date: Tue, 28 Dec 2021 12:01:17 +0100 Subject: [PATCH 13/13] pyocf: mark most surprise shutdown tests as long .. to skip by default Signed-off-by: Adam Rutkowski --- .../test_management_surprise_shutdown.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py b/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py index 19bb053..a15450e 100644 --- a/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py +++ b/tests/functional/tests/surprise_shutdown/test_management_surprise_shutdown.py @@ -138,6 +138,7 @@ def test_surprise_shutdown_add_core(pyocf_ctx): # power failure during core removal @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_remove_core(pyocf_ctx): core_device = Volume(S.from_MiB(10)) core = Core.using_device(core_device) @@ -156,6 +157,7 @@ def test_surprise_shutdown_remove_core(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_remove_core_with_data(pyocf_ctx): io_offset = mngmt_op_surprise_shutdown_test_io_offset core_device = Volume(S.from_MiB(10)) @@ -183,6 +185,7 @@ def test_surprise_shutdown_remove_core_with_data(pyocf_ctx): # power failure during core add after previous core removed @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_swap_core(pyocf_ctx): core_device_1 = Volume(S.from_MiB(10), uuid="dev1") core_device_2 = Volume(S.from_MiB(10), uuid="dev2") @@ -225,6 +228,7 @@ def test_surprise_shutdown_swap_core(pyocf_ctx): # power failure during core add after previous core removed @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_swap_core_with_data(pyocf_ctx): core_device_1 = Volume(S.from_MiB(10), uuid="dev1") core_device_2 = Volume(S.from_MiB(10), uuid="dev2") @@ -274,6 +278,7 @@ def test_surprise_shutdown_swap_core_with_data(pyocf_ctx): # to avoid loading improperly initialized cache? # 2. uuid checksum mismatch should not allow cache to load @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_start_cache(pyocf_ctx): error_triggered = True error_io_seq_no = 0 @@ -320,6 +325,7 @@ def test_surprise_shutdown_start_cache(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_stop_cache(pyocf_ctx): core_device = Volume(S.from_MiB(10)) error_triggered = True @@ -463,6 +469,7 @@ def _test_surprise_shutdown_mngmt_generic(pyocf_ctx, func): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_change_cache_mode(pyocf_ctx): _test_surprise_shutdown_mngmt_generic( pyocf_ctx, lambda cache, core: cache.change_cache_mode(CacheMode.WT) @@ -470,6 +477,7 @@ def test_surprise_shutdown_change_cache_mode(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_set_cleaning_policy(pyocf_ctx): core_device = Volume(S.from_MiB(10)) core = Core(device=core_device, try_add=False) @@ -490,6 +498,7 @@ def test_surprise_shutdown_set_cleaning_policy(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_set_seq_cut_off_policy(pyocf_ctx): core_device = Volume(S.from_MiB(10)) core = Core(device=core_device, try_add=False) @@ -510,6 +519,7 @@ def test_surprise_shutdown_set_seq_cut_off_policy(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_set_seq_cut_off_promotion(pyocf_ctx): _test_surprise_shutdown_mngmt_generic( pyocf_ctx, lambda cache, core: cache.set_seq_cut_off_promotion(256) @@ -517,6 +527,7 @@ def test_surprise_shutdown_set_seq_cut_off_promotion(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_set_seq_cut_off_threshold(pyocf_ctx): _test_surprise_shutdown_mngmt_generic( pyocf_ctx, lambda cache, core: cache.set_seq_cut_off_threshold(S.from_MiB(2).B) @@ -524,6 +535,7 @@ def test_surprise_shutdown_set_seq_cut_off_threshold(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_set_cleaning_policy_param(pyocf_ctx): core_device = Volume(S.from_MiB(10)) core = Core(device=core_device, try_add=False) @@ -575,6 +587,7 @@ def test_surprise_shutdown_set_cleaning_policy_param(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_set_promotion_policy(pyocf_ctx): core_device = Volume(S.from_MiB(10)) core = Core(device=core_device, try_add=False) @@ -595,6 +608,7 @@ def test_surprise_shutdown_set_promotion_policy(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_set_promotion_policy_param(pyocf_ctx): core_device = Volume(S.from_MiB(10)) core = Core(device=core_device, try_add=False) @@ -632,6 +646,7 @@ def test_surprise_shutdown_set_promotion_policy_param(pyocf_ctx): @pytest.mark.security +@pytest.mark.long def test_surprise_shutdown_set_io_class_config(pyocf_ctx): core_device = Volume(S.from_MiB(10)) core = Core(device=core_device, try_add=False)