
Current startup procedure works on an assumption that we will deal with asynchronously appearing devices in asynchronous way (udev rules) and synchronous events in the system (systemd units) won't interfere. If we would break anything (mounts) we would just take those units and restart them. This tactic was working as long as resetting systemd units took reasonable time. As hackish as it sounds it worked in all systems that the software has been validated on. Unfortunately it stopped working because of *.mount units taking MUCH longer time to restart even on mainstream OSes, so it's time to change. This change implements open-cas systemd service which will wait synchronously with systemd bootup process for all required Open CAS devices to start. If they don't we fail the boot process just as failing mounts would. We also make sure that this process takes place before any mounts (aside from root FS and other critical FS's) are even attempted. Now opencas-mount-utility can be discarded. To override this behaviour on per-core basis you can specify lazy_startup=true option in opencas.conf. Signed-off-by: Jan Musial <jan.musial@intel.com>
195 lines
5.8 KiB
Python
Executable File
195 lines
5.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# Copyright(c) 2012-2019 Intel Corporation
|
|
# SPDX-License-Identifier: BSD-3-Clause-Clear
|
|
#
|
|
|
|
import argparse
|
|
import sys
|
|
import re
|
|
import opencas
|
|
|
|
def eprint(*args, **kwargs):
|
|
print(*args, file=sys.stderr, **kwargs)
|
|
|
|
# Start - load all the caches and add cores
|
|
|
|
def start():
|
|
try:
|
|
config = opencas.cas_config.from_file('/etc/opencas/opencas.conf',
|
|
allow_incomplete=True)
|
|
except Exception as e:
|
|
eprint(e)
|
|
eprint('Unable to parse config file.')
|
|
exit(1)
|
|
|
|
for cache in config.caches.values():
|
|
try:
|
|
opencas.start_cache(cache, True)
|
|
except opencas.casadm.CasadmError as e:
|
|
eprint('Unable to load cache {0} ({1}). Reason:\n{2}'
|
|
.format(cache.cache_id, cache.device, e.result.stderr))
|
|
|
|
# Initial cache start
|
|
|
|
def add_core_recursive(core, config):
|
|
with_error = False
|
|
if core.added:
|
|
return with_error
|
|
if core.marked:
|
|
eprint('Unable to add core {0} to cache {1}. Reason:\nRecursive core configuration!'
|
|
.format(core.device, core.cache_id))
|
|
exit(3)
|
|
core.marked = True
|
|
match = re.match(r'/dev/cas(\d)-(\d).*', core.device)
|
|
if match:
|
|
cache_id,core_id = match.groups()
|
|
with_error = add_core_recursive(config.caches[int(cache_id)].cores[int(core_id)], config)
|
|
try:
|
|
opencas.add_core(core, False)
|
|
core.added = True
|
|
except opencas.casadm.CasadmError as e:
|
|
eprint('Unable to add core {0} to cache {1}. Reason:\n{2}'
|
|
.format(core.device, core.cache_id, e.result.stderr))
|
|
with_error = True
|
|
return with_error
|
|
|
|
def init(force):
|
|
exit_code = 0
|
|
try:
|
|
config = opencas.cas_config.from_file('/etc/opencas/opencas.conf')
|
|
except Exception as e:
|
|
eprint(e)
|
|
eprint('Unable to parse config file.')
|
|
exit(1)
|
|
|
|
if not force:
|
|
for cache in config.caches.values():
|
|
try:
|
|
status = opencas.check_cache_device(cache.device)
|
|
if status['Is cache'] == 'yes' and status['Cache dirty'] == 'yes':
|
|
eprint('Unable to perform initial configuration.\n' \
|
|
'One of cache devices contains dirty data.')
|
|
exit(1)
|
|
except opencas.casadm.CasadmError as e:
|
|
eprint('Unable to check status of device {0}. Reason:\n{1}'
|
|
.format(cache.device, e.result.stderr))
|
|
exit(e.result.exit_code)
|
|
|
|
for cache in config.caches.values():
|
|
try:
|
|
opencas.start_cache(cache, False, force)
|
|
except opencas.casadm.CasadmError as e:
|
|
eprint('Unable to start cache {0} ({1}). Reason:\n{2}'
|
|
.format(cache.cache_id, cache.device, e.result.stderr))
|
|
exit_code = 2
|
|
try:
|
|
opencas.configure_cache(cache)
|
|
except opencas.casadm.CasadmError as e:
|
|
eprint('Unable to configure cache {0} ({1}). Reason:\n{2}'
|
|
.format(cache.cache_id, cache.device, e.result.stderr))
|
|
exit_code = 2
|
|
|
|
for core in config.cores:
|
|
core.added = False
|
|
core.marked = False
|
|
for core in config.cores:
|
|
with_error = add_core_recursive(core, config)
|
|
if with_error:
|
|
exit_code = 2
|
|
|
|
exit(exit_code)
|
|
|
|
|
|
def settle(timeout, interval):
|
|
try:
|
|
not_initialized = opencas.wait_for_startup(timeout, interval)
|
|
except Exception as e:
|
|
eprint(e)
|
|
exit(1)
|
|
|
|
if not_initialized:
|
|
eprint("Open CAS initialization failed. Couldn't set up all required devices")
|
|
for device in not_initialized:
|
|
eprint(
|
|
"Couldn't add device {} as core {} in cache {}".format(
|
|
device.device, device.core_id, device.cache_id
|
|
)
|
|
)
|
|
exit(1)
|
|
|
|
exit(0)
|
|
|
|
|
|
# Stop - detach cores and stop caches
|
|
def stop(flush):
|
|
try:
|
|
opencas.stop(flush)
|
|
except Exception as e:
|
|
eprint(e)
|
|
|
|
|
|
# Command line arguments parsing
|
|
|
|
|
|
class cas:
|
|
def __init__(self):
|
|
parser = argparse.ArgumentParser(prog="casctl")
|
|
subparsers = parser.add_subparsers(title="actions")
|
|
|
|
parser_init = subparsers.add_parser("init", help="Setup initial configuration")
|
|
parser_init.set_defaults(command="init")
|
|
parser_init.add_argument(
|
|
"--force", action="store_true", help="Force cache start"
|
|
)
|
|
|
|
parser_start = subparsers.add_parser("start", help="Start cache configuration")
|
|
parser_start.set_defaults(command="start")
|
|
|
|
parser_settle = subparsers.add_parser(
|
|
"settle", help="Wait for startup of devices"
|
|
)
|
|
parser_settle.set_defaults(command="settle")
|
|
parser_settle.add_argument(
|
|
"--timeout",
|
|
action="store",
|
|
help="How long should command wait [s]",
|
|
default=270,
|
|
type=int,
|
|
)
|
|
parser_settle.add_argument(
|
|
"--interval",
|
|
action="store",
|
|
help="Polling interval [s]",
|
|
default=5,
|
|
type=int,
|
|
)
|
|
|
|
parser_stop = subparsers.add_parser("stop", help="Stop cache configuration")
|
|
parser_stop.set_defaults(command="stop")
|
|
parser_stop.add_argument(
|
|
"--flush", action="store_true", help="Flush data before stopping"
|
|
)
|
|
|
|
if len(sys.argv[1:]) == 0:
|
|
parser.print_help()
|
|
return
|
|
|
|
args = parser.parse_args(sys.argv[1:])
|
|
getattr(self, "command_" + args.command)(args)
|
|
|
|
def command_init(self, args):
|
|
init(args.force)
|
|
|
|
def command_start(self, args):
|
|
start()
|
|
|
|
def command_settle(self, args):
|
|
settle(args.timeout, args.interval)
|
|
|
|
def command_stop(self, args):
|
|
stop(args.flush)
|
|
|
|
if __name__ == '__main__':
|
|
cas()
|