open-cas-linux/utils/casctl
Jan Musial aaedfb35dd Change startup procedure
Current startup procedure works on an assumption that we will
deal with asynchronously appearing devices in asynchronous way
(udev rules) and synchronous events in the system (systemd units)
won't interfere. If we would break anything (mounts) we would just
take those units and restart them. This tactic was working as long
as resetting systemd units took reasonable time.

As hackish as it sounds it worked in all systems that the software
has been validated on. Unfortunately it stopped working because
of *.mount units taking MUCH longer time to restart even on
mainstream OSes, so it's time to change.

This change implements open-cas systemd service which will wait
synchronously with systemd bootup process for all required Open CAS
devices to start. If they don't we fail the boot process just as
failing mounts would. We also make sure that this process takes place
before any mounts (aside from root FS and other critical FS's) are
even attempted. Now opencas-mount-utility can be discarded.

To override this behaviour on per-core basis you can specify
lazy_startup=true option in opencas.conf.

Signed-off-by: Jan Musial <jan.musial@intel.com>
2019-11-14 12:20:08 +01:00

195 lines
5.8 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright(c) 2012-2019 Intel Corporation
# SPDX-License-Identifier: BSD-3-Clause-Clear
#
import argparse
import sys
import re
import opencas
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
# Start - load all the caches and add cores
def start():
try:
config = opencas.cas_config.from_file('/etc/opencas/opencas.conf',
allow_incomplete=True)
except Exception as e:
eprint(e)
eprint('Unable to parse config file.')
exit(1)
for cache in config.caches.values():
try:
opencas.start_cache(cache, True)
except opencas.casadm.CasadmError as e:
eprint('Unable to load cache {0} ({1}). Reason:\n{2}'
.format(cache.cache_id, cache.device, e.result.stderr))
# Initial cache start
def add_core_recursive(core, config):
with_error = False
if core.added:
return with_error
if core.marked:
eprint('Unable to add core {0} to cache {1}. Reason:\nRecursive core configuration!'
.format(core.device, core.cache_id))
exit(3)
core.marked = True
match = re.match(r'/dev/cas(\d)-(\d).*', core.device)
if match:
cache_id,core_id = match.groups()
with_error = add_core_recursive(config.caches[int(cache_id)].cores[int(core_id)], config)
try:
opencas.add_core(core, False)
core.added = True
except opencas.casadm.CasadmError as e:
eprint('Unable to add core {0} to cache {1}. Reason:\n{2}'
.format(core.device, core.cache_id, e.result.stderr))
with_error = True
return with_error
def init(force):
exit_code = 0
try:
config = opencas.cas_config.from_file('/etc/opencas/opencas.conf')
except Exception as e:
eprint(e)
eprint('Unable to parse config file.')
exit(1)
if not force:
for cache in config.caches.values():
try:
status = opencas.check_cache_device(cache.device)
if status['Is cache'] == 'yes' and status['Cache dirty'] == 'yes':
eprint('Unable to perform initial configuration.\n' \
'One of cache devices contains dirty data.')
exit(1)
except opencas.casadm.CasadmError as e:
eprint('Unable to check status of device {0}. Reason:\n{1}'
.format(cache.device, e.result.stderr))
exit(e.result.exit_code)
for cache in config.caches.values():
try:
opencas.start_cache(cache, False, force)
except opencas.casadm.CasadmError as e:
eprint('Unable to start cache {0} ({1}). Reason:\n{2}'
.format(cache.cache_id, cache.device, e.result.stderr))
exit_code = 2
try:
opencas.configure_cache(cache)
except opencas.casadm.CasadmError as e:
eprint('Unable to configure cache {0} ({1}). Reason:\n{2}'
.format(cache.cache_id, cache.device, e.result.stderr))
exit_code = 2
for core in config.cores:
core.added = False
core.marked = False
for core in config.cores:
with_error = add_core_recursive(core, config)
if with_error:
exit_code = 2
exit(exit_code)
def settle(timeout, interval):
try:
not_initialized = opencas.wait_for_startup(timeout, interval)
except Exception as e:
eprint(e)
exit(1)
if not_initialized:
eprint("Open CAS initialization failed. Couldn't set up all required devices")
for device in not_initialized:
eprint(
"Couldn't add device {} as core {} in cache {}".format(
device.device, device.core_id, device.cache_id
)
)
exit(1)
exit(0)
# Stop - detach cores and stop caches
def stop(flush):
try:
opencas.stop(flush)
except Exception as e:
eprint(e)
# Command line arguments parsing
class cas:
def __init__(self):
parser = argparse.ArgumentParser(prog="casctl")
subparsers = parser.add_subparsers(title="actions")
parser_init = subparsers.add_parser("init", help="Setup initial configuration")
parser_init.set_defaults(command="init")
parser_init.add_argument(
"--force", action="store_true", help="Force cache start"
)
parser_start = subparsers.add_parser("start", help="Start cache configuration")
parser_start.set_defaults(command="start")
parser_settle = subparsers.add_parser(
"settle", help="Wait for startup of devices"
)
parser_settle.set_defaults(command="settle")
parser_settle.add_argument(
"--timeout",
action="store",
help="How long should command wait [s]",
default=270,
type=int,
)
parser_settle.add_argument(
"--interval",
action="store",
help="Polling interval [s]",
default=5,
type=int,
)
parser_stop = subparsers.add_parser("stop", help="Stop cache configuration")
parser_stop.set_defaults(command="stop")
parser_stop.add_argument(
"--flush", action="store_true", help="Flush data before stopping"
)
if len(sys.argv[1:]) == 0:
parser.print_help()
return
args = parser.parse_args(sys.argv[1:])
getattr(self, "command_" + args.command)(args)
def command_init(self, args):
init(args.force)
def command_start(self, args):
start()
def command_settle(self, args):
settle(args.timeout, args.interval)
def command_stop(self, args):
stop(args.flush)
if __name__ == '__main__':
cas()