427 lines
14 KiB
Python
Executable File
427 lines
14 KiB
Python
Executable File
#!/usr/bin/python3
|
|
#
|
|
# Copyright(c) 2019-2021 Intel Corporation
|
|
# SPDX-License-Identifier: BSD-3-Clause-Clear
|
|
#
|
|
|
|
import logging
|
|
import sys
|
|
import argparse
|
|
import subprocess
|
|
import os
|
|
from pathlib import Path
|
|
from shutil import copy
|
|
|
|
import opencas
|
|
from upgrade_utils import (
|
|
yn_prompt,
|
|
Failure,
|
|
Success,
|
|
Warn,
|
|
Abort,
|
|
StateMachine,
|
|
UpgradeState,
|
|
insert_module,
|
|
remove_module,
|
|
get_device_sysfs_path,
|
|
get_device_schedulers,
|
|
set_device_scheduler,
|
|
drop_os_caches,
|
|
)
|
|
|
|
LOG_FILE = "/var/log/opencas-upgrade/upgrade.log"
|
|
COMPILATION_LOG = "/var/log/opencas-upgrade/build.log"
|
|
INIT_CONFIG_FILE_TMP_PATH = "/tmp/opencas_upgrade_backup.conf"
|
|
|
|
CAS_CACHE_KEY = "CAS Cache Kernel Module"
|
|
CAS_DISK_KEY = "CAS Disk Kernel Module"
|
|
CAS_CLI_KEY = "CAS CLI Utility"
|
|
|
|
CAS_DISK_MIN_VER = 20
|
|
|
|
OCL_BUILD_ROOT = f"{os.path.dirname(__file__)}/.."
|
|
|
|
|
|
class InitUpgrade(UpgradeState):
|
|
log = "Performing initial checks"
|
|
will_prompt = True
|
|
|
|
class NotInstalled(Warn):
|
|
pass
|
|
|
|
def do_work(self):
|
|
if self.state_machine.params["force"]:
|
|
self.will_prompt = False
|
|
elif yn_prompt("Proceed with upgrade procedure?") == "n":
|
|
return Abort("User aborted")
|
|
|
|
try:
|
|
version = opencas.get_cas_version()
|
|
except FileNotFoundError:
|
|
return self.NotInstalled(f"Couldn't detect current CAS version")
|
|
except Exception as e:
|
|
return Failure(f"Failed to get current version of CAS {e}")
|
|
|
|
# Although there shouldn't be any problem with upgrade from CAS 19.9 to newer, this feature
|
|
# is not full validated so it is disabled by default.
|
|
if (int(version[CAS_DISK_KEY].split('.')[0]) < CAS_DISK_MIN_VER):
|
|
return Failure(f"Minimal cas_disk version required to perform upgrade is 20.01!")
|
|
|
|
if version[CAS_CLI_KEY] != version[CAS_CACHE_KEY]:
|
|
return Failure("Mismatch between CLI and cas_cache version")
|
|
|
|
active_devices = opencas.get_devices_state()
|
|
|
|
if any([device["status"] != "Running" for device in active_devices["caches"].values()]):
|
|
return Failure("Incomplete configuration. Run casadm -L to review CAS state!")
|
|
|
|
if len(active_devices["core_pool"]) != 0:
|
|
return Failure("Incomplete configuration. Run casadm -L to review CAS state!")
|
|
|
|
if os.path.isfile(opencas.cas_config.default_location):
|
|
try:
|
|
copy(opencas.cas_config.default_location, INIT_CONFIG_FILE_TMP_PATH)
|
|
except IOError:
|
|
return Failure("Could not save init config file backup")
|
|
else:
|
|
logging.info("Init config file not found")
|
|
|
|
return Success()
|
|
|
|
|
|
class BuildCas(UpgradeState):
|
|
log = "Compiling Open CAS"
|
|
|
|
def do_work(self):
|
|
with open(COMPILATION_LOG, "w") as build_log:
|
|
logging.info("Running ./configure for CAS")
|
|
p = subprocess.run(
|
|
["./configure"], cwd=OCL_BUILD_ROOT, stdout=build_log, stderr=build_log
|
|
)
|
|
if p.returncode:
|
|
return Failure(f"Configuration of Open CAS failed. Build log: {COMPILATION_LOG}")
|
|
|
|
logging.info("Compiling CAS")
|
|
p = subprocess.run(
|
|
["make", "-j"], cwd=OCL_BUILD_ROOT, stdout=build_log, stderr=build_log
|
|
)
|
|
if p.returncode:
|
|
return Failure(f"Compilation of Open CAS failed. Build log: {COMPILATION_LOG}")
|
|
|
|
return Success()
|
|
|
|
|
|
class RestoreInitConfig(UpgradeState):
|
|
log = "Restore original init config file"
|
|
|
|
def do_work(self):
|
|
if not os.path.isfile(INIT_CONFIG_FILE_TMP_PATH):
|
|
logging.warning("Init config backup file not found")
|
|
return Success()
|
|
|
|
try:
|
|
copy(INIT_CONFIG_FILE_TMP_PATH, opencas.cas_config.default_location)
|
|
except IOError:
|
|
logging.error(f"Failed to restore original init config file from "
|
|
f"{INIT_CONFIG_FILE_TMP_PATH}. Configuration has to be restored manually.")
|
|
return Failure()
|
|
|
|
os.remove(INIT_CONFIG_FILE_TMP_PATH)
|
|
return Success()
|
|
|
|
|
|
class InstallCas(UpgradeState):
|
|
log = "Installing new Open CAS files"
|
|
|
|
def do_work(self):
|
|
with open(COMPILATION_LOG, "a") as install_log:
|
|
p = subprocess.run(
|
|
["make", "install"], cwd=OCL_BUILD_ROOT, stdout=install_log, stderr=install_log
|
|
)
|
|
|
|
if p.returncode:
|
|
return Failure(
|
|
f"Installation of Open CAS failed. Installation log: {COMPILATION_LOG}"
|
|
)
|
|
|
|
return Success()
|
|
|
|
|
|
class InsertModule(UpgradeState):
|
|
module_path = f"{OCL_BUILD_ROOT}/modules/cas_cache/cas_cache.ko"
|
|
options = {"installed": False}
|
|
|
|
def do_work(self):
|
|
try:
|
|
insert_module(self.module_path, **self.options)
|
|
except Exception as e:
|
|
return Failure(f"Couldn't load module {self.module_path}. Reason: {e}")
|
|
|
|
return Success()
|
|
|
|
|
|
class InsertNewModule(InsertModule):
|
|
log = "Try to insert new caching module"
|
|
|
|
|
|
class DryRun(InsertModule):
|
|
log = "Perform dry run to check upgrade data integrity"
|
|
options = {"installed": False, "dry_run": 1}
|
|
|
|
def do_work(self):
|
|
result = super().do_work()
|
|
if not isinstance(result, Success):
|
|
return Failure("Dry run failed")
|
|
|
|
try:
|
|
logging.info(
|
|
"Insert with dry_run set succeeded. Removing module to attempt final insertion."
|
|
)
|
|
remove_module("cas_cache")
|
|
except Exception as e:
|
|
return Failure(f"Could not unload module. Reason: {e}")
|
|
|
|
return result
|
|
|
|
|
|
class InsertInstalledModule(InsertModule):
|
|
log = "Restore old cas_cache module"
|
|
module_path = "cas_cache"
|
|
options = {"installed": True}
|
|
|
|
|
|
class PrepareForUpgrade(UpgradeState):
|
|
log = "Preparing Open CAS for upgrade"
|
|
|
|
def do_work(self):
|
|
try:
|
|
logging.info("Switching CAS to upgrade mode")
|
|
opencas.casadm.start_upgrade()
|
|
except opencas.casadm.CasadmError as e:
|
|
return Failure(e)
|
|
|
|
return Success()
|
|
|
|
|
|
class RemoveModule(UpgradeState):
|
|
log = "Removing cas_cache module"
|
|
|
|
def do_work(self):
|
|
try:
|
|
logging.info("Removing cas_cache module")
|
|
remove_module("cas_cache")
|
|
except Exception as e:
|
|
return Failure(f"Failed to remove cas_cache module. Reason: {e}")
|
|
|
|
return Success()
|
|
|
|
|
|
class SetSchedulersToNoop(UpgradeState):
|
|
log = "Setting core devices schedulers to noop"
|
|
|
|
def do_work(self):
|
|
unique_core_sysfs_devices = {
|
|
get_device_sysfs_path(core["device"])
|
|
for core in opencas.get_devices_state()["cores"].values()
|
|
}
|
|
|
|
self.state_machine.schedulers = {}
|
|
for core_sysfs_path in unique_core_sysfs_devices:
|
|
current, available = get_device_schedulers(core_sysfs_path)
|
|
self.state_machine.schedulers[core_sysfs_path] = current
|
|
|
|
if current in ["noop", "none"]:
|
|
logging.info(f"Core {core_sysfs_path} already uses {current} scheduler. Skipping.")
|
|
elif "noop" in available:
|
|
logging.info(f"Switching scheduler for {core_sysfs_path}: {current} => noop")
|
|
set_device_scheduler(core_sysfs_path, "noop")
|
|
elif "none" in available:
|
|
logging.info(f"Switching scheduler for {core_sysfs_path}: {current} => none")
|
|
set_device_scheduler(core_sysfs_path, "none")
|
|
else:
|
|
logging.info(f"No appropriate scheduler available for {core_sysfs_path}. Skipping.")
|
|
|
|
return Success()
|
|
|
|
|
|
class RestoreCoreSchedulers(UpgradeState):
|
|
log = "Restoring core devices schedulers"
|
|
|
|
def do_work(self):
|
|
for core_sysfs_path, scheduler in self.state_machine.schedulers.items():
|
|
current = get_device_schedulers(core_sysfs_path)[0]
|
|
if scheduler in ["noop", "none"]:
|
|
logging.info(
|
|
f"Device {core_sysfs_path} already uses {scheduler} scheduler. Skipping."
|
|
)
|
|
else:
|
|
logging.info(f"Switching scheduler for {core_sysfs_path}: {current} => {scheduler}")
|
|
set_device_scheduler(core_sysfs_path, scheduler)
|
|
|
|
return Success()
|
|
|
|
|
|
class DropCaches(UpgradeState):
|
|
log = "Drop OS caches to ensure memory availability"
|
|
|
|
def do_work(self):
|
|
logging.info("Dropping slab and page caches using procfs")
|
|
drop_os_caches()
|
|
|
|
return Success()
|
|
|
|
|
|
class InstallStateMachine(StateMachine):
|
|
transition_map = {
|
|
BuildCas: {Success: InstallCas},
|
|
InstallCas: {"default": None},
|
|
"default": None,
|
|
}
|
|
|
|
|
|
class RegularInstall(UpgradeState):
|
|
log = "Installing Open CAS"
|
|
will_prompt = True
|
|
|
|
def do_work(self):
|
|
if (
|
|
yn_prompt("Previous CAS installation not detected. Perform regular installation?")
|
|
== "n"
|
|
):
|
|
return Abort("User aborted")
|
|
|
|
sm = InstallStateMachine(BuildCas)
|
|
result = sm.run()
|
|
|
|
return result
|
|
|
|
|
|
class UpgradeStateMachine(StateMachine):
|
|
"""
|
|
This class implements whole CAS in-flight upgrade procedure.
|
|
|
|
+-------------+ not +---------------+
|
|
|InitUpgrade +----------->+RegularInstall |
|
|
+------+------+ installed | |
|
|
| | +---------+ |
|
|
v | |BuildCas | |
|
|
+------+------+ fail | +---+-----+ |
|
|
|BuildCas +------+ | | |
|
|
+------+------+ | | v |
|
|
| | | +---+-----+ |
|
|
v | | |Install | |
|
|
+------+------+ fail | | +---------+ |
|
|
|SetToNoop +------+ | |
|
|
+------+------+ | +----------+----+
|
|
| | |
|
|
v +----------------+
|
|
+------+------+ fail |
|
|
|PrepareForUpg+-------------------+ |
|
|
+------+------+ | |
|
|
| | |
|
|
v | |
|
|
+------+------+ fail | |
|
|
|RemoveModule +-------------------+ |
|
|
+------+------+ | |
|
|
| | |
|
|
v | |
|
|
+------+------+ fail | |
|
|
|DropCaches +-------------------+ |
|
|
+------+------+ | |
|
|
| | |
|
|
v | |
|
|
+------+------+ fail | |
|
|
|DryRun +--------+ | |
|
|
+------+------+ | | |
|
|
| | | |
|
|
v | | |
|
|
+------+------+ fail | | |
|
|
|InsertNew +--------+ | |
|
|
+------+------+ | | |
|
|
| | | |
|
|
v | | |
|
|
+------+------+ fail | | |
|
|
|InstallCas +--------+ | |
|
|
+------+------+ v | |
|
|
| +------+--------+ | |
|
|
| |InsertInstalled| | |
|
|
v +------+--------+ | |
|
|
+------+------+ | | |
|
|
|RestoreConfig+<-------+ | |
|
|
+------+------+ | |
|
|
| | |
|
|
v | |
|
|
+------+------+ | |
|
|
|RestoreSched +<------------------+ |
|
|
+------+------+ v
|
|
| +------+---+
|
|
+---------------------->+ END |
|
|
+----------+
|
|
|
|
"""
|
|
|
|
transition_map = {
|
|
InitUpgrade: {Success: BuildCas, InitUpgrade.NotInstalled: RegularInstall},
|
|
RegularInstall: {"default": None},
|
|
BuildCas: {Success: SetSchedulersToNoop},
|
|
SetSchedulersToNoop: {Success: PrepareForUpgrade},
|
|
PrepareForUpgrade: {Success: RemoveModule, Failure: RestoreCoreSchedulers},
|
|
RemoveModule: {Success: DropCaches, Failure: RestoreCoreSchedulers},
|
|
DropCaches: {Success: DryRun, Failure: RestoreCoreSchedulers},
|
|
DryRun: {Success: InsertNewModule, Failure: InsertInstalledModule},
|
|
InsertNewModule: {Success: InstallCas, Failure: InsertInstalledModule},
|
|
InstallCas: {Success: RestoreInitConfig, Failure: InsertInstalledModule},
|
|
InsertInstalledModule: {"default": RestoreInitConfig},
|
|
RestoreInitConfig: {"default": RestoreCoreSchedulers},
|
|
RestoreCoreSchedulers: {"default": None},
|
|
"default": None,
|
|
}
|
|
|
|
|
|
def start(args):
|
|
logging.info(">>> Starting OpenCAS upgrade procedure")
|
|
s = UpgradeStateMachine(InitUpgrade, force=args.force)
|
|
result = s.run()
|
|
|
|
if not isinstance(result, Success):
|
|
print(f"Upgrade failed. Reason: {result}")
|
|
exit_code = 1
|
|
else:
|
|
print("Upgrade completed successfully!")
|
|
exit_code = 0
|
|
|
|
print(f"Full upgrade log: {LOG_FILE}")
|
|
|
|
return exit_code
|
|
|
|
|
|
def main():
|
|
Path(LOG_FILE).parent.mkdir(mode=0o700, parents=True, exist_ok=True)
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setLevel(logging.INFO)
|
|
logging.basicConfig(
|
|
level=logging.DEBUG,
|
|
format="%(asctime)s [%(levelname)-5.5s] %(message)s",
|
|
handlers=[logging.FileHandler(LOG_FILE)],
|
|
)
|
|
|
|
parser = argparse.ArgumentParser(prog=__file__)
|
|
subparsers = parser.add_subparsers(title="actions")
|
|
|
|
parser_start = subparsers.add_parser(
|
|
"start", help="Upgrade Open CAS in flight to current version"
|
|
)
|
|
parser_start.add_argument("--force", action="store_true", help="Skip prompts")
|
|
parser_start.set_defaults(func=start)
|
|
|
|
if len(sys.argv[1:]) == 0:
|
|
parser.print_help()
|
|
return 1
|
|
else:
|
|
args = parser.parse_args(sys.argv[1:])
|
|
return args.func(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|