#!/usr/bin/python3 # # Copyright(c) 2019-2020 Intel Corporation # SPDX-License-Identifier: BSD-3-Clause-Clear # import logging import sys import argparse import subprocess import os from pathlib import Path from shutil import copy import opencas from upgrade_utils import ( yn_prompt, Failure, Success, Warn, Abort, StateMachine, UpgradeState, insert_module, remove_module, get_device_sysfs_path, get_device_schedulers, set_device_scheduler, drop_os_caches, ) LOG_FILE = "/var/log/opencas-upgrade/upgrade.log" COMPILATION_LOG = "/var/log/opencas-upgrade/build.log" INIT_CONFIG_FILE_TMP_PATH = "/tmp/opencas_upgrade_backup.conf" CAS_CACHE_KEY = "CAS Cache Kernel Module" CAS_DISK_KEY = "CAS Disk Kernel Module" CAS_CLI_KEY = "CAS CLI Utility" CAS_DISK_MIN_VER = 20 OCL_BUILD_ROOT = f"{os.path.dirname(__file__)}/.." class InitUpgrade(UpgradeState): log = "Performing initial checks" will_prompt = True class NotInstalled(Warn): pass def do_work(self): if self.state_machine.params["force"]: self.will_prompt = False elif yn_prompt("Proceed with upgrade procedure?") == "n": return Abort("User aborted") try: version = opencas.get_cas_version() except FileNotFoundError: return self.NotInstalled(f"Couldn't detect current CAS version") except Exception as e: return Failure(f"Failed to get current version of CAS {e}") # Although there shouldn't be any problem with upgrade from CAS 19.9 to newer, this feature # is not full validated so it is disabled by default. if (int(version[CAS_DISK_KEY].split('.')[0]) < CAS_DISK_MIN_VER): return Failure(f"Minimal cas_disk version required to perform upgrade is 20.01!") if version[CAS_CLI_KEY] != version[CAS_CACHE_KEY]: return Failure("Mismatch between CLI and cas_cache version") active_devices = opencas.get_devices_state() if any([device["status"] != "Running" for device in active_devices["caches"].values()]): return Failure("Incomplete configuration. Run casadm -L to review CAS state!") if len(active_devices["core_pool"]) != 0: return Failure("Incomplete configuration. Run casadm -L to review CAS state!") if os.path.isfile(opencas.cas_config.default_location): try: copy(opencas.cas_config.default_location, INIT_CONFIG_FILE_TMP_PATH) except IOError: return Failure("Could not save init config file backup") else: logging.info("Init config file not found") return Success() class BuildCas(UpgradeState): log = "Compiling Open CAS" def do_work(self): with open(COMPILATION_LOG, "w") as build_log: logging.info("Running ./configure for CAS") p = subprocess.run( ["./configure"], cwd=OCL_BUILD_ROOT, stdout=build_log, stderr=build_log ) if p.returncode: return Failure(f"Configuration of Open CAS failed. Build log: {COMPILATION_LOG}") logging.info("Compiling CAS") p = subprocess.run( ["make", "-j"], cwd=OCL_BUILD_ROOT, stdout=build_log, stderr=build_log ) if p.returncode: return Failure(f"Compilation of Open CAS failed. Build log: {COMPILATION_LOG}") return Success() class RestoreInitConfig(UpgradeState): log = "Restore original init config file" def do_work(self): if not os.path.isfile(INIT_CONFIG_FILE_TMP_PATH): logging.warning("Init config backup file not found") return Success() try: copy(INIT_CONFIG_FILE_TMP_PATH, opencas.cas_config.default_location) except IOError: logging.error(f"Failed to restore original init config file from " f"{INIT_CONFIG_FILE_TMP_PATH}. Configuration has to be restored manually.") return Failure() os.remove(INIT_CONFIG_FILE_TMP_PATH) return Success() class InstallCas(UpgradeState): log = "Installing new Open CAS files" def do_work(self): with open(COMPILATION_LOG, "a") as install_log: p = subprocess.run( ["make", "install"], cwd=OCL_BUILD_ROOT, stdout=install_log, stderr=install_log ) if p.returncode: return Failure( f"Installation of Open CAS failed. Installation log: {COMPILATION_LOG}" ) return Success() class InsertModule(UpgradeState): module_path = f"{OCL_BUILD_ROOT}/modules/cas_cache/cas_cache.ko" options = {"installed": False} def do_work(self): try: insert_module(self.module_path, **self.options) except Exception as e: return Failure(f"Couldn't load module {self.module_path}. Reason: {e}") return Success() class InsertNewModule(InsertModule): log = "Try to insert new caching module" class DryRun(InsertModule): log = "Perform dry run to check upgrade data integrity" options = {"installed": False, "dry_run": 1} def do_work(self): result = super().do_work() if not isinstance(result, Success): return Failure("Dry run failed") try: logging.info( "Insert with dry_run set succeeded. Removing module to attempt final insertion." ) remove_module("cas_cache") except Exception as e: return Failure(f"Could not unload module. Reason: {e}") return result class InsertInstalledModule(InsertModule): log = "Restore old cas_cache module" module_path = "cas_cache" options = {"installed": True} class PrepareForUpgrade(UpgradeState): log = "Preparing Open CAS for upgrade" def do_work(self): try: logging.info("Switching CAS to upgrade mode") opencas.casadm.start_upgrade() except opencas.casadm.CasadmError as e: return Failure(e) return Success() class RemoveModule(UpgradeState): log = "Removing cas_cache module" def do_work(self): try: logging.info("Removing cas_cache module") remove_module("cas_cache") except Exception as e: return Failure(f"Failed to remove cas_cache module. Reason: {e}") return Success() class SetSchedulersToNoop(UpgradeState): log = "Setting core devices schedulers to noop" def do_work(self): unique_core_sysfs_devices = { get_device_sysfs_path(core["device"]) for core in opencas.get_devices_state()["cores"].values() } self.state_machine.schedulers = {} for core_sysfs_path in unique_core_sysfs_devices: current, available = get_device_schedulers(core_sysfs_path) self.state_machine.schedulers[core_sysfs_path] = current if current in ["noop", "none"]: logging.info(f"Core {core_sysfs_path} already uses {current} scheduler. Skipping.") elif "noop" in available: logging.info(f"Switching scheduler for {core_sysfs_path}: {current} => noop") set_device_scheduler(core_sysfs_path, "noop") elif "none" in available: logging.info(f"Switching scheduler for {core_sysfs_path}: {current} => none") set_device_scheduler(core_sysfs_path, "none") else: logging.info(f"No appropriate scheduler available for {core_sysfs_path}. Skipping.") return Success() class RestoreCoreSchedulers(UpgradeState): log = "Restoring core devices schedulers" def do_work(self): for core_sysfs_path, scheduler in self.state_machine.schedulers.items(): current = get_device_schedulers(core_sysfs_path)[0] if scheduler in ["noop", "none"]: logging.info( f"Device {core_sysfs_path} already uses {scheduler} scheduler. Skipping." ) else: logging.info(f"Switching scheduler for {core_sysfs_path}: {current} => {scheduler}") set_device_scheduler(core_sysfs_path, scheduler) return Success() class DropCaches(UpgradeState): log = "Drop OS caches to ensure memory availability" def do_work(self): logging.info("Dropping slab and page caches using procfs") drop_os_caches() return Success() class InstallStateMachine(StateMachine): transition_map = { BuildCas: {Success: InstallCas}, InstallCas: {"default": None}, "default": None, } class RegularInstall(UpgradeState): log = "Installing Open CAS" will_prompt = True def do_work(self): if ( yn_prompt("Previous CAS installation not detected. Perform regular installation?") == "n" ): return Abort("User aborted") sm = InstallStateMachine(BuildCas) result = sm.run() return result class UpgradeStateMachine(StateMachine): """ This class implements whole CAS in-flight upgrade procedure. +-------------+ not +---------------+ |InitUpgrade +----------->+RegularInstall | +------+------+ installed | | | | +---------+ | v | |BuildCas | | +------+------+ fail | +---+-----+ | |BuildCas +------+ | | | +------+------+ | | v | | | | +---+-----+ | v | | |Install | | +------+------+ fail | | +---------+ | |SetToNoop +------+ | | +------+------+ | +----------+----+ | | | v +----------------+ +------+------+ fail | |PrepareForUpg+-------------------+ | +------+------+ | | | | | v | | +------+------+ fail | | |RemoveModule +-------------------+ | +------+------+ | | | | | v | | +------+------+ fail | | |DropCaches +-------------------+ | +------+------+ | | | | | v | | +------+------+ fail | | |DryRun +--------+ | | +------+------+ | | | | | | | v | | | +------+------+ fail | | | |InsertNew +--------+ | | +------+------+ | | | | | | | v | | | +------+------+ fail | | | |InstallCas +--------+ | | +------+------+ v | | | +------+--------+ | | | |InsertInstalled| | | v +------+--------+ | | +------+------+ | | | |RestoreConfig+<-------+ | | +------+------+ | | | | | v | | +------+------+ | | |RestoreSched +<------------------+ | +------+------+ v | +------+---+ +---------------------->+ END | +----------+ """ transition_map = { InitUpgrade: {Success: BuildCas, InitUpgrade.NotInstalled: RegularInstall}, RegularInstall: {"default": None}, BuildCas: {Success: SetSchedulersToNoop}, SetSchedulersToNoop: {Success: PrepareForUpgrade}, PrepareForUpgrade: {Success: RemoveModule, Failure: RestoreCoreSchedulers}, RemoveModule: {Success: DropCaches, Failure: RestoreCoreSchedulers}, DropCaches: {Success: DryRun, Failure: RestoreCoreSchedulers}, DryRun: {Success: InsertNewModule, Failure: InsertInstalledModule}, InsertNewModule: {Success: InstallCas, Failure: InsertInstalledModule}, InstallCas: {Success: RestoreInitConfig, Failure: InsertInstalledModule}, InsertInstalledModule: {"default": RestoreInitConfig}, RestoreInitConfig: {"default": RestoreCoreSchedulers}, RestoreCoreSchedulers: {"default": None}, "default": None, } def start(args): logging.info(">>> Starting OpenCAS upgrade procedure") s = UpgradeStateMachine(InitUpgrade, force=args.force) result = s.run() if not isinstance(result, Success): print(f"Upgrade failed. Reason: {result}") else: print("Upgrade completed successfully!") print(f"Full upgrade log: {LOG_FILE}") def main(): Path(LOG_FILE).parent.mkdir(mode=0o700, parents=True, exist_ok=True) console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) logging.basicConfig( level=logging.DEBUG, format="%(asctime)s [%(levelname)-5.5s] %(message)s", handlers=[logging.FileHandler(LOG_FILE)], ) parser = argparse.ArgumentParser(prog=__file__) subparsers = parser.add_subparsers(title="actions") parser_start = subparsers.add_parser( "start", help="Upgrade Open CAS in flight to current version" ) parser_start.add_argument("--force", action="store_true", help="Skip prompts") parser_start.set_defaults(func=start) if len(sys.argv[1:]) == 0: parser.print_help() else: args = parser.parse_args(sys.argv[1:]) args.func(args) if __name__ == "__main__": main()