Merge pull request #1534 from Kamoppl/kamilg/fix_scope_bugs_v3

Kamilg/fix scope bugs v3
This commit is contained in:
Katarzyna Treder 2024-09-30 10:05:34 +02:00 committed by GitHub
commit bc6165a185
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 130 additions and 90 deletions

View File

@ -508,7 +508,8 @@ def stop_all_caches() -> None:
caches = get_caches()
if not caches:
return
for cache in caches:
# Running "cache stop" on the reversed list to resolve the multilevel cache stop problem
for cache in reversed(caches):
stop_cache(cache_id=cache.cache_id, no_data_flush=True)

View File

@ -55,12 +55,12 @@ remove_inactive_dirty_core = [
]
stop_cache_incomplete = [
r"Error while removing cache \d+",
r"Error while stopping cache \d+",
r"Cache is in incomplete state - at least one core is inactive",
]
stop_cache_errors = [
r"Removed cache \d+ with errors",
r"Stopped cache \d+ with errors",
r"Error while writing to cache device",
]

View File

@ -226,14 +226,14 @@ class CacheIoClassStats(CoreIoClassStats):
class CacheConfigStats:
def __init__(self, stats_dict):
self.cache_id = stats_dict["Cache Id"]
self.cache_id = int(stats_dict["Cache Id"])
self.cache_size = parse_value(
value=stats_dict["Cache Size [4KiB Blocks]"], unit_type=UnitType.block_4k
)
self.cache_dev = stats_dict["Cache Device"]
self.exp_obj = stats_dict["Exported Object"]
self.core_dev = stats_dict["Core Devices"]
self.inactive_core_devices = stats_dict["Inactive Core Devices"]
self.core_dev = int(stats_dict["Core Devices"])
self.inactive_core_devices = int(stats_dict["Inactive Core Devices"])
self.write_policy = stats_dict["Write Policy"]
self.cleaning_policy = stats_dict["Cleaning Policy"]
self.promotion_policy = stats_dict["Promotion Policy"]
@ -361,6 +361,18 @@ class UsageStats:
self.free = parse_value(value=stats_dict[f"Free {unit}"], unit_type=unit)
self.clean = parse_value(value=stats_dict[f"Clean {unit}"], unit_type=unit)
self.dirty = parse_value(value=stats_dict[f"Dirty {unit}"], unit_type=unit)
if f"Inactive Occupancy {unit}" in stats_dict:
self.inactive_occupancy = parse_value(
value=stats_dict[f"Inactive Occupancy {unit}"], unit_type=unit
)
if f"Inactive Clean {unit}" in stats_dict:
self.inactive_clean = parse_value(
value=stats_dict[f"Inactive Clean {unit}"], unit_type=unit
)
if f"Inactive Dirty {unit}" in stats_dict:
self.inactive_dirty = parse_value(
value=stats_dict[f"Inactive Dirty {unit}"], unit_type=unit
)
def __str__(self):
return (

View File

@ -205,7 +205,7 @@ def test_attach_core_to_incomplete_cache_volume():
TestRun.fail("Core should be in inactive state.")
with TestRun.step("Plug core device."):
plug_device.plug()
plug_device.plug_all()
time.sleep(1)
with TestRun.step("Check if core status changed to active and CAS device is visible in OS."):
@ -228,6 +228,10 @@ def test_flush_inactive_devices():
- Flushing inactive CAS devices is possible neither by cleaning thread,
nor by calling cleaning methods
"""
staleness_time = Time(seconds=10)
wake_up_time = Time(seconds=1)
activity_threshold = Time(milliseconds=500)
with TestRun.step("Prepare devices."):
devices = prepare_devices([("cache", 1), ("core1", 1), ("core2", 1)])
cache_dev = devices["cache"].partitions[0]
@ -240,9 +244,9 @@ def test_flush_inactive_devices():
cache.set_cleaning_policy(CleaningPolicy.alru)
cache.set_params_alru(
FlushParametersAlru(
staleness_time=Time(seconds=10),
wake_up_time=Time(seconds=1),
activity_threshold=Time(milliseconds=500),
staleness_time=staleness_time,
wake_up_time=wake_up_time,
activity_threshold=activity_threshold,
)
)
@ -307,7 +311,7 @@ def test_flush_inactive_devices():
check_amount_of_dirty_data(dirty_lines_before)
with TestRun.step("Plug core disk and verify that this change is reflected on the cache list."):
plug_device.plug()
plug_device.plug_all()
time.sleep(1)
first_core.wait_for_status_change(CoreStatus.active)
cache_status = cache.get_status()
@ -377,7 +381,7 @@ def test_list_cache_and_cache_volumes():
TestRun.fail(f"Cache should be in incomplete state. Actual state: {cache_status}.")
with TestRun.step("Plug missing device and stop cache."):
plug_device.plug()
plug_device.plug_all()
time.sleep(1)
core.wait_for_status_change(CoreStatus.active)
cache_status = cache.get_status()
@ -425,7 +429,7 @@ def test_load_cache_with_inactive_core():
cli_messages.check_stderr_msg(output, cli_messages.load_inactive_core_missing)
with TestRun.step("Plug missing device and stop cache."):
plug_device.plug()
plug_device.plug_all()
time.sleep(1)
core.wait_for_status_change(CoreStatus.active)
cache_status = cache.get_status()
@ -514,7 +518,7 @@ def test_preserve_data_for_inactive_device():
with TestRun.step(
"Plug core disk using sysfs and verify this change is reflected " "on the cache list."
):
plug_device.plug()
plug_device.plug_all()
time.sleep(1)
if cache.get_status() != CacheStatus.running or core.get_status() != CoreStatus.active:
TestRun.fail(
@ -621,7 +625,8 @@ def test_print_statistics_inactive(cache_mode):
check_number_of_inactive_devices(inactive_stats_before, 2)
with TestRun.step("Attach one of detached core devices and add it to cache."):
first_plug_device.plug()
first_plug_device.plug_all()
second_plug_device.unplug()
time.sleep(1)
first_core_status = first_core.get_status()
if first_core_status != CoreStatus.active:
@ -639,21 +644,21 @@ def test_print_statistics_inactive(cache_mode):
lazy_write_traits = CacheModeTrait.LazyWrites in cache_mode_traits
lazy_writes_or_no_insert_write_traits = not insert_write_traits or lazy_write_traits
check_inactive_usage_stats(
inactive_stats_before.inactive_usage_stats.inactive_occupancy,
inactive_stats_after.inactive_usage_stats.inactive_occupancy,
check_usage_stats(
inactive_stats_before.usage_stats.inactive_occupancy,
inactive_stats_after.usage_stats.inactive_occupancy,
"inactive occupancy",
not insert_write_traits,
)
check_inactive_usage_stats(
inactive_stats_before.inactive_usage_stats.inactive_clean,
inactive_stats_after.inactive_usage_stats.inactive_clean,
check_usage_stats(
inactive_stats_before.usage_stats.inactive_clean,
inactive_stats_after.usage_stats.inactive_clean,
"inactive clean",
lazy_writes_or_no_insert_write_traits,
)
check_inactive_usage_stats(
inactive_stats_before.inactive_usage_stats.inactive_dirty,
inactive_stats_after.inactive_usage_stats.inactive_dirty,
check_usage_stats(
inactive_stats_before.usage_stats.inactive_dirty,
inactive_stats_after.usage_stats.inactive_dirty,
"inactive dirty",
not lazy_write_traits,
)
@ -661,7 +666,7 @@ def test_print_statistics_inactive(cache_mode):
with TestRun.step("Check statistics per inactive core."):
inactive_core_stats = second_core.get_statistics()
if (
inactive_stats_after.inactive_usage_stats.inactive_occupancy
inactive_stats_after.usage_stats.inactive_occupancy
== inactive_core_stats.usage_stats.occupancy
):
TestRun.LOGGER.info(
@ -671,7 +676,7 @@ def test_print_statistics_inactive(cache_mode):
TestRun.fail(
f"Inactive core occupancy ({inactive_core_stats.usage_stats.occupancy}) "
f"should be the same as cache inactive occupancy "
f"({inactive_stats_after.inactive_usage_stats.inactive_occupancy})."
f"({inactive_stats_after.usage_stats.inactive_occupancy})."
)
with TestRun.step("Remove inactive core from cache and check if cache is in running state."):
@ -692,7 +697,7 @@ def test_print_statistics_inactive(cache_mode):
check_number_of_inactive_devices(cache_stats, 0)
with TestRun.step("Plug missing disk and stop cache."):
second_plug_device.plug()
second_plug_device.plug_all()
time.sleep(1)
cache.stop()
@ -743,7 +748,7 @@ def test_remove_detached_cores():
with TestRun.step("Unplug core device from system and plug it back."):
plug_device.unplug()
time.sleep(2)
plug_device.plug()
plug_device.plug_all()
time.sleep(1)
with TestRun.step(
@ -891,7 +896,7 @@ def test_remove_inactive_devices():
core.remove_inactive(force=True)
with TestRun.step("Plug missing disk and stop cache."):
plug_device.plug()
plug_device.plug_all()
time.sleep(1)
casadm.stop_all_caches()
@ -951,7 +956,7 @@ def test_stop_cache_with_inactive_devices():
cache.stop(no_data_flush=True)
with TestRun.step("Plug missing core device."):
plug_device.plug()
plug_device.plug_all()
time.sleep(1)
with TestRun.step("Load cache."):
@ -977,7 +982,7 @@ def test_stop_cache_with_inactive_devices():
with TestRun.step("Stop cache with 'no data flush' option and plug missing core device."):
cache.stop(no_data_flush=True)
plug_device.plug()
plug_device.plug_all()
# Methods used in tests:
@ -989,7 +994,7 @@ def try_stop_incomplete_cache(cache):
cli_messages.check_stderr_msg(e.output, cli_messages.stop_cache_incomplete)
def check_inactive_usage_stats(stats_before, stats_after, stat_name, should_be_zero):
def check_usage_stats(stats_before, stats_after, stat_name, should_be_zero):
if should_be_zero and stats_before == Size.zero() and stats_after == Size.zero():
TestRun.LOGGER.info(f"{stat_name} value before and after equals 0 as expected.")
elif not should_be_zero and stats_after < stats_before:
@ -1001,7 +1006,7 @@ def check_inactive_usage_stats(stats_before, stats_after, stat_name, should_be_z
def check_number_of_inactive_devices(stats: CacheStats, expected_num):
inactive_core_num = stats.config_stats.inactive_core_dev
inactive_core_num = stats.config_stats.inactive_core_devices
if inactive_core_num != expected_num:
TestRun.fail(
f"There is wrong number of inactive core devices in cache statistics. "
@ -1011,9 +1016,9 @@ def check_number_of_inactive_devices(stats: CacheStats, expected_num):
def check_if_inactive_section_exists(stats, should_exist: bool = True):
TestRun.LOGGER.info(str(stats))
if not should_exist and hasattr(stats, "inactive_usage_stats"):
if not should_exist and "inactive_occupancy" in stats.usage_stats:
TestRun.fail("There is an inactive section in cache usage statistics.")
elif should_exist and not hasattr(stats, "inactive_usage_stats"):
elif should_exist and "inactive_occupancy" not in stats.usage_stats:
TestRun.fail("There is no inactive section in cache usage statistics.")

View File

@ -36,15 +36,17 @@ def test_udev_core_partition():
"""
cores_count = 4
with TestRun.step("Create four partitions on core device and one on cache device."):
with TestRun.step("Prepare cache and core devices"):
cache_disk = TestRun.disks["cache"]
cache_disk.create_partitions([Size(1, Unit.GibiByte)])
cache_dev = cache_disk.partitions[0]
core_disk = TestRun.disks["core"]
cache_disk.create_partitions([Size(1, Unit.GibiByte)])
core_disk.create_partitions([Size(2, Unit.GibiByte)] * cores_count)
cache_dev = cache_disk.partitions[0]
core_devices = core_disk.partitions
with TestRun.step("Start cache and add created partitions as cores."):
with TestRun.step("Start cache and add cores"):
cache = casadm.start_cache(cache_dev, force=True)
for dev in core_devices:
cache.add_core(dev)
@ -83,30 +85,35 @@ def test_udev_core():
- Core devices are listed in core pool when cache is not available
- Core devices are moved from core pool and attached to cache after plugging cache device
"""
with TestRun.step("Start cache and add core."):
with TestRun.step("Prepare cache and core devices"):
cache_disk = TestRun.disks["cache"]
cache_disk.create_partitions([Size(1, Unit.GibiByte)])
cache_dev = cache_disk.partitions[0]
core_disk = TestRun.disks["core"]
cache_disk.create_partitions([Size(1, Unit.GibiByte)])
core_disk.create_partitions([Size(2, Unit.GibiByte)])
cache_dev = cache_disk.partitions[0]
core_dev = core_disk.partitions[0]
with TestRun.step("Start cache and add core"):
cache = casadm.start_cache(cache_dev, force=True)
core = cache.add_core(core_dev)
with TestRun.step("Create init config from running CAS configuration."):
with TestRun.step("Create init config from running CAS configuration"):
InitConfig.create_init_config_from_running_configuration()
with TestRun.step("Stop cache."):
with TestRun.step("Stop cache"):
cache.stop()
with TestRun.step("Unplug core disk."):
with TestRun.step("Unplug core disk"):
core_disk.unplug()
with TestRun.step("Plug core disk."):
with TestRun.step("Plug core disk"):
core_disk.plug_all()
time.sleep(1)
with TestRun.step("Check if core device is listed in core pool."):
with TestRun.step("Check if core device is listed in core pool"):
check_if_dev_in_core_pool(core_dev)
with TestRun.step("Unplug cache disk."):
@ -275,7 +282,7 @@ def test_neg_udev_cache_load():
if len(cas_devices["caches"]) != 1:
TestRun.LOGGER.error(f"There is wrong number of caches. Expected: 1, actual: "
f"{len(cas_devices['caches'])}")
elif cas_devices["caches"][1]["device"] != cache_disk.partitions[0].path or \
elif cas_devices["caches"][1]["device_path"] != cache_disk.partitions[0].path or \
CacheStatus[(cas_devices["caches"][1]["status"]).lower()] != CacheStatus.running:
TestRun.LOGGER.error(f"Cache did not load properly: {cas_devices['caches'][1]}")
if len(cas_devices["cores"]) != 2:
@ -286,7 +293,7 @@ def test_neg_udev_cache_load():
for i in first_cache_core_numbers:
correct_core_devices.append(core_disk.partitions[i].path)
for core in cas_devices["cores"].values():
if core["device"] not in correct_core_devices or \
if core["device_path"] not in correct_core_devices or \
CoreStatus[core["status"].lower()] != CoreStatus.active or \
core["cache_id"] != 1:
TestRun.LOGGER.error(f"Core did not load correctly: {core}.")
@ -305,14 +312,16 @@ def test_neg_udev_cache_load():
for i in range(0, cores_count):
if i not in first_cache_core_numbers:
core_pool_expected_devices.append(core_disk.partitions[i].path)
for c in cas_devices["core_pool"]:
if c["device"] not in core_pool_expected_devices:
core_pool = cas_devices["core_pool"]
for c in core_pool.values():
if c["device_path"] not in core_pool_expected_devices:
TestRun.LOGGER.error(f"Wrong core device added to core pool: {c}.")
def check_if_dev_in_core_pool(dev, should_be_in_core_pool=True):
cas_devices_dict = casadm_parser.get_cas_devices_dict()
is_in_core_pool = any(dev.path == d["device"] for d in cas_devices_dict["core_pool"])
is_in_core_pool = any(dev.path == d["device_path"]
for d in cas_devices_dict["core_pool"].values())
if not (should_be_in_core_pool ^ is_in_core_pool):
TestRun.LOGGER.info(f"Core device {dev.path} is"
f"{'' if should_be_in_core_pool else ' not'} listed in core pool "

View File

@ -1,9 +1,10 @@
#
# Copyright(c) 2020-2022 Intel Corporation
# Copyright(c) 2024 Huawei Technologies Co., Ltd.
# SPDX-License-Identifier: BSD-3-Clause
#
import os
import posixpath
import pytest
from api.cas import casadm
@ -17,9 +18,6 @@ from test_utils.os_utils import drop_caches, DropCachesMode, sync
from test_utils.size import Size, Unit
mount_point = "/mnt/test"
@pytest.mark.os_dependent
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeLowerThan("cache"))
@ -30,51 +28,65 @@ mount_point = "/mnt/test"
def test_load_after_clean_shutdown(reboot_type, cache_mode, filesystem):
"""
title: Planned system shutdown test.
description: Test for data consistency after clean system shutdown.
description: |
Test for data consistency after clean system shutdown.
pass_criteria:
- DUT should reboot successfully.
- DUT reboot successful.
- Checksum of file on core device should be the same before and after reboot.
"""
with TestRun.step("Prepare CAS device."):
cache_disk = TestRun.disks['cache']
mount_point = "/mnt/test"
with TestRun.step("Prepare cache and core devices"):
cache_disk = TestRun.disks["cache"]
core_dev = TestRun.disks["core"]
cache_disk.create_partitions([Size(1, Unit.GibiByte)])
cache_dev = cache_disk.partitions[0]
core_dev = TestRun.disks['core']
with TestRun.step("Start cache and add core"):
cache = casadm.start_cache(cache_dev, cache_mode, force=True)
core = cache.add_core(core_dev)
with TestRun.step("Create filesystem on the core device and mount it"):
core.create_filesystem(filesystem, blocksize=int(Size(1, Unit.Blocks4096)))
core.mount(mount_point)
with TestRun.step("Create file on cache and count its checksum."):
test_file = File(os.path.join(mount_point, "test_file"))
Dd()\
.input("/dev/zero")\
.output(test_file.full_path)\
.block_size(Size(1, Unit.KibiByte))\
.count(1024)\
.run()
with TestRun.step("Create file on exported object"):
test_file = File(posixpath.join(mount_point, "test_file"))
dd = (
Dd()
.input("/dev/zero")
.output(test_file.full_path)
.block_size(Size(1, Unit.KibiByte))
.count(1024)
)
dd.run()
with TestRun.step("Calculate test file md5sums before reboot"):
test_file.refresh_item()
test_file_md5 = test_file.md5sum()
sync()
drop_caches(DropCachesMode.ALL)
with TestRun.step("Reset platform."):
with TestRun.step("Reset platform"):
if reboot_type == "soft":
TestRun.executor.reboot()
else:
power_control = TestRun.plugin_manager.get_plugin('power_control')
power_control = TestRun.plugin_manager.get_plugin("power_control")
power_control.power_cycle()
with TestRun.step("Load cache."):
with TestRun.step("Load cache and mount core"):
casadm.load_cache(cache_dev)
core.mount(mount_point)
with TestRun.step("Check file md5sum."):
with TestRun.step("Compare test file md5sums"):
test_file.refresh_item()
if test_file_md5 != test_file.md5sum():
TestRun.LOGGER.error("Checksums does not match - file is corrupted.")
else:
TestRun.LOGGER.info("File checksum is correct.")
with TestRun.step("Remove test file."):
with TestRun.step("Remove test file"):
test_file.remove()

View File

@ -1,9 +1,10 @@
#
# Copyright(c) 2020-2021 Intel Corporation
# Copyright(c) 2024 Huawei Technologies Co., Ltd.
# SPDX-License-Identifier: BSD-3-Clause
#
import time
import time
import pytest
from api.cas import cas_module, casctl
@ -22,20 +23,21 @@ def test_init_status():
- CAS management device present in OS when CAS modules are loaded.
- CAS management device not present in OS when CAS modules are not loaded.
"""
with TestRun.step("Check if CAS management device is present in OS."):
with TestRun.step("Check if CAS management device is present in OS"):
time.sleep(5)
if cas_module.is_cas_management_dev_present():
TestRun.LOGGER.info("CAS management device is present in OS when CAS module is loaded.")
else:
TestRun.fail("CAS management device is not present in OS when CAS module is loaded.")
with TestRun.step("Remove CAS module."):
with TestRun.step("Remove CAS module"):
cas_module.unload_all_cas_modules()
with TestRun.step("Stop CAS service."):
with TestRun.step("Stop CAS service"):
casctl.stop()
with TestRun.step("Check if CAS management device is not present in OS."):
with TestRun.step("Check if CAS management device is not present in OS"):
time.sleep(5)
if not cas_module.is_cas_management_dev_present():
TestRun.LOGGER.info(
@ -43,7 +45,6 @@ def test_init_status():
else:
TestRun.fail("CAS management device is present in OS when CAS module is not loaded.")
with TestRun.step("Load CAS modules and start CAS service."):
with TestRun.step("Load CAS modules and start CAS service"):
os_utils.load_kernel_module(CasModule.cache.value)
os_utils.load_kernel_module(CasModule.disk.value)
casctl.start()