Merge pull request #1279 from jfckm/startup-negative-tests

Startup negative tests
This commit is contained in:
Robert Baldyga 2022-07-21 13:57:38 +02:00 committed by GitHub
commit bf822ada23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 197 additions and 35 deletions

View File

@ -0,0 +1,42 @@
#
# Copyright(c) 2022 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause
#
from pathlib import Path
from datetime import timedelta
from string import Template
from textwrap import dedent
from test_tools.fs_utils import check_if_directory_exists, create_directory, write_file, remove
from test_utils.systemd import reload_daemon
opencas_drop_in_directory = Path("/etc/systemd/system/open-cas.service.d/")
test_drop_in_file = Path("10-modified-timeout.conf")
drop_in_timeout_template = Template(
dedent(
"""
[Service]
ExecStart=
ExecStart=/bin/sh -c '/usr/bin/echo "Default open-cas.service config overwritten by test!" > /dev/kmsg'
ExecStart=-/sbin/casctl settle --timeout $timeout --interval 1
TimeoutStartSec=$timeout
"""
).strip()
)
def set_cas_service_timeout(timeout: timedelta = timedelta(minutes=30)):
if not check_if_directory_exists(opencas_drop_in_directory):
create_directory(opencas_drop_in_directory, parents=True)
contents = drop_in_timeout_template.substitute(timeout=timeout.seconds)
write_file(str(opencas_drop_in_directory / test_drop_in_file), contents)
reload_daemon()
def clear_cas_service_timeout():
remove(opencas_drop_in_directory, force=True, recursive=True, ignore_errors=True)
reload_daemon()

View File

@ -18,6 +18,7 @@ from core.test_run_utils import TestRun
from api.cas import installer
from api.cas import casadm
from api.cas import git
from api.cas.cas_service import opencas_drop_in_directory
from storage_devices.raid import Raid
from storage_devices.ramdisk import RamDisk
from test_utils.os_utils import Udev, kill_all_io
@ -25,6 +26,7 @@ from test_utils.disk_finder import get_disk_serial_number
from test_tools.disk_utils import PartitionTable, create_partition_table
from test_tools.device_mapper import DeviceMapper
from test_tools.mdadm import Mdadm
from test_tools.fs_utils import remove
from log.logger import create_log, Log
from test_utils.singleton import Singleton
@ -236,6 +238,8 @@ def base_prepare(item):
except Exception:
pass # TODO: Reboot DUT if test is executed remotely
remove(opencas_drop_in_directory, recursive=True, ignore_errors=True)
from storage_devices.drbd import Drbd
if Drbd.is_installed():
__drbd_cleanup()

View File

@ -4,6 +4,7 @@
#
import pytest
from datetime import timedelta
from api.cas import casadm, casctl, casadm_parser
from api.cas.casadm_parser import get_caches, get_cores, get_cas_devices_dict
@ -17,6 +18,8 @@ from test_utils import fstab
from test_tools.dd import Dd
from test_utils.size import Unit, Size
from test_utils.os_utils import sync, Udev
from test_utils.emergency_escape import EmergencyEscape
from api.cas.cas_service import set_cas_service_timeout, clear_cas_service_timeout
mountpoint = "/mnt"
@ -256,7 +259,8 @@ def test_cas_startup_lazy():
)
@pytest.mark.skip(reason="not implemented")
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeSet([DiskType.hdd]))
def test_cas_startup_negative_missing_core():
"""
title: Test unsuccessful boot with CAS configuration
@ -264,21 +268,60 @@ def test_cas_startup_negative_missing_core():
Check that DUT doesn't boot sucesfully when using invalid CAS configuration
pass_criteria:
- DUT enters emergency mode
steps:
- Prepare one drive for caches and one for cores
- Create 2 cache partitions and 4 core partitons
- Create opencas.conf config for 2 caches each with 2 core partitions as cores
- Mark second cache as lazy_startup=True
- Run casctl init
- Run casctl stop
- Remove second cache cores partitions
- Reboot DUT
- Verify the DUT entered emergency mode
"""
pass
with TestRun.step("Create 2 cache partitions and 4 core partitons"):
cache_disk = TestRun.disks["cache"]
core_disk = TestRun.disks["core"]
cache_disk.create_partitions([Size(200, Unit.MebiByte)] * 2)
core_disk.create_partitions([Size(200, Unit.MebiByte)] * 4)
with TestRun.step(f"Add a cache configuration with cache device with `lazy_startup` flag"):
init_conf = InitConfig()
init_conf.add_cache(1, cache_disk.partitions[0], extra_flags="lazy_startup=True")
init_conf.add_core(1, 1, core_disk.partitions[0])
init_conf.add_core(1, 2, core_disk.partitions[1])
with TestRun.step(f"Add a cache configuration with core device with `lazy_startup` flag"):
init_conf.add_cache(2, cache_disk.partitions[1])
init_conf.add_core(2, 1, core_disk.partitions[2])
init_conf.add_core(2, 2, core_disk.partitions[3], extra_flags="lazy_startup=True")
init_conf.save_config_file()
sync()
with TestRun.step(f"Start and stop all the configurations using the casctl utility"):
output = casctl.init(True)
if output.exit_code != 0:
TestRun.fail(f"Failed to initialize caches from config file. Error: {output.stdout}")
casadm.stop_all_caches()
with TestRun.step(
"Disable udev to allow manipulating partitions without CAS being automatically loaded"
):
Udev.disable()
with TestRun.step(f"Remove core partition"):
core_disk.remove_partition(core_disk.partitions[0])
escape = EmergencyEscape()
escape.add_escape_method_command("/usr/bin/rm /etc/opencas/opencas.conf")
set_cas_service_timeout(timedelta(seconds=10), interval=timedelta(seconds=1))
with TestRun.step("Reboot DUT with emergency escape armed"):
with escape:
TestRun.executor.reboot()
TestRun.executor.wait_for_connection()
with TestRun.step("Verify the DUT entered emergency mode"):
dmesg_out = TestRun.executor.run_expect_success("dmesg").stdout.split("\n")
if not escape.verify_trigger_in_log(dmesg_out):
TestRun.LOGGER.error("DUT didn't enter emergency mode after reboot")
clear_cas_service_timeout()
InitConfig().create_default_init_config()
@pytest.mark.skip(reason="not implemented")
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@pytest.mark.require_disk("core", DiskTypeSet([DiskType.hdd]))
def test_cas_startup_negative_missing_cache():
"""
title: Test unsuccessful boot with CAS configuration
@ -286,18 +329,56 @@ def test_cas_startup_negative_missing_cache():
Check that DUT doesn't boot sucesfully when using invalid CAS configuration
pass_criteria:
- DUT enters emergency mode
steps:
- Prepare one drive for caches and one for cores
- Create 2 cache partitions and 4 core partitons
- Create opencas.conf config for 2 caches each with 2 core partitions as cores
- Mark cores of second cache as lazy_startup=True
- Run casctl init
- Run casctl stop
- Remove second cache partition
- Reboot DUT
- Verify the DUT entered emergency mode
"""
pass
with TestRun.step("Create 2 cache partitions and 4 core partitons"):
cache_disk = TestRun.disks["cache"]
core_disk = TestRun.disks["core"]
cache_disk.create_partitions([Size(200, Unit.MebiByte)] * 2)
core_disk.create_partitions([Size(200, Unit.MebiByte)] * 4)
with TestRun.step(f"Add a cache configuration with cache device with `lazy_startup` flag"):
init_conf = InitConfig()
init_conf.add_cache(1, cache_disk.partitions[0], extra_flags="lazy_startup=True")
init_conf.add_core(1, 1, core_disk.partitions[0])
init_conf.add_core(1, 2, core_disk.partitions[1])
with TestRun.step(f"Add a cache configuration with core devices with `lazy_startup` flag"):
init_conf.add_cache(2, cache_disk.partitions[1])
init_conf.add_core(2, 1, core_disk.partitions[2], extra_flags="lazy_startup=True")
init_conf.add_core(2, 2, core_disk.partitions[3], extra_flags="lazy_startup=True")
init_conf.save_config_file()
sync()
with TestRun.step(f"Start and stop all the configurations using the casctl utility"):
output = casctl.init(True)
if output.exit_code != 0:
TestRun.fail(f"Failed to initialize caches from config file. Error: {output.stdout}")
casadm.stop_all_caches()
with TestRun.step(
"Disable udev to allow manipulating partitions without CAS being automatically loaded"
):
Udev.disable()
with TestRun.step(f"Remove second cache partition"):
cache_disk.remove_partition(cache_disk.partitions[1])
escape = EmergencyEscape()
escape.add_escape_method_command("/usr/bin/rm /etc/opencas/opencas.conf")
set_cas_service_timeout(timedelta(minutes=1))
with TestRun.step("Reboot DUT with emergency escape armed"):
with escape:
TestRun.executor.reboot()
TestRun.executor.wait_for_connection()
with TestRun.step("Verify the DUT entered emergency mode"):
dmesg_out = TestRun.executor.run_expect_success("dmesg").stdout.split("\n")
if not escape.verify_trigger_in_log(dmesg_out):
TestRun.LOGGER.error("DUT didn't enter emergency mode after reboot")
clear_cas_service_timeout()
InitConfig().create_default_init_config()
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
@ -404,7 +485,7 @@ def test_failover_config_startup():
)
@pytest.mark.skip(reason="not implemented")
@pytest.mark.require_disk("cache", DiskTypeSet([DiskType.optane, DiskType.nand]))
def test_failover_config_startup_negative():
"""
title: Test unsuccessful boot with failover-specific configuration options
@ -414,17 +495,52 @@ def test_failover_config_startup_negative():
mode was in fact triggered.
pass_criteria:
- DUT enters emergency mode
steps:
- Prepare one drive for cache
- Create partition for cache
- Create opencas.conf config for single standby cache on created partiton
- Run casctl init
- Run casctl stop
- Remove cache partition
- Reboot DUT
- Verify the DUT entered emergency mode
"""
pass
with TestRun.step("Create cache partition"):
cache_disk = TestRun.disks["cache"]
cache_disk.create_partitions([Size(200, Unit.MebiByte)])
with TestRun.step(f"Add a cache configuration with standby cache"):
init_conf = InitConfig()
init_conf.add_cache(
1,
cache_disk.partitions[0],
extra_flags="target_failover_state=standby,cache_line_size=4"
)
init_conf.save_config_file()
sync()
with TestRun.step(f"Start and stop all the configurations using the casctl utility"):
output = casctl.init(True)
if output.exit_code != 0:
TestRun.fail(f"Failed to initialize caches from config file. Error: {output.stdout}")
casadm.stop_all_caches()
with TestRun.step(
"Disable udev to allow manipulating partitions without CAS being automatically loaded"
):
Udev.disable()
with TestRun.step(f"Remove second cache partition"):
cache_disk.remove_partition(cache_disk.partitions[0])
escape = EmergencyEscape()
escape.add_escape_method_command("/usr/bin/rm /etc/opencas/opencas.conf")
set_cas_service_timeout(timedelta(seconds=32))
with TestRun.step("Reboot DUT with emergency escape armed"):
with escape:
TestRun.executor.reboot()
TestRun.executor.wait_for_connection()
with TestRun.step("Verify the DUT entered emergency mode"):
dmesg_out = TestRun.executor.run_expect_success("dmesg").stdout.split("\n")
if not escape.verify_trigger_in_log(dmesg_out):
TestRun.LOGGER.error("DUT didn't enter emergency mode after reboot")
clear_cas_service_timeout()
InitConfig().create_default_init_config()
def validate_cache(cache_mode):