From e5947f1ee289fc32c6cdece55f14ca199bd57b74 Mon Sep 17 00:00:00 2001 From: Alexey Avramov Date: Tue, 7 May 2019 02:47:28 +0900 Subject: [PATCH] test --- trash/n10 | 3020 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3020 insertions(+) create mode 100755 trash/n10 diff --git a/trash/n10 b/trash/n10 new file mode 100755 index 0000000..ada86b7 --- /dev/null +++ b/trash/n10 @@ -0,0 +1,3020 @@ +#!/usr/bin/env python3 +"""A daemon that prevents OOM in Linux systems.""" + +import os +from ctypes import CDLL +from time import sleep, time +from operator import itemgetter +from sys import stdout, stderr, argv, exit, version +from re import search +from sre_constants import error as invalid_re +from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP + + +start_time = time() + + +help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG] + +optional arguments: + -h, --help show this help message and exit + -v, --version print version + -t, --test print some tests + -p, --print-proc-table + print table of processes with their badness values + -c CONFIG, --config CONFIG + path to the config file, default values: + ./nohang.conf, /etc/nohang/nohang.conf""" + + +SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK']) + +SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE']) + +conf_err_mess = 'Invalid config. Exit.' + +sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP] + +sig_dict = { + SIGKILL: 'SIGKILL', + SIGINT: 'SIGINT', + SIGQUIT: 'SIGQUIT', + SIGHUP: 'SIGHUP', + SIGTERM: 'SIGTERM' +} + +self_pid = str(os.getpid()) + +self_uid = os.geteuid() + +if self_uid == 0: + root = True +else: + root = False + + +if os.path.exists('./nohang_notify_helper'): + notify_helper_path = './nohang_notify_helper' +else: + notify_helper_path = '/usr/sbin/nohang_notify_helper' + + +victim_dict = dict() + + + +victim_id = None +actions_time_dict = dict() +actions_time_dict['action_handled'] = [time(), victim_id] +# print(actions_time_dict) + + + +# will store corrective actions stat +stat_dict = dict() + + +separate_log = False # will be overwritten after parse config + + +def find_cgroup_indexes(): + """ Find cgroup-line positions in /proc/*/cgroup file. + """ + + cgroup_v1_index = None + cgroup_v2_index = None + + with open('/proc/self/cgroup') as f: + for index, line in enumerate(f): + if ':name=' in line: + cgroup_v1_index = index + if line.startswith('0::'): + cgroup_v2_index = index + + return cgroup_v1_index, cgroup_v2_index + + +cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes() + + +########################################################################## + +# define functions + +''' +def self_rss(): + """ + """ + return pid_to_status(self_pid)[5] + + +def print_self_rss(): + """ + """ + log('Self RSS: {} MiB'.format(self_rss())) +''' + + +def pid_to_rss(pid): + try: + rss = int(rline1( + '/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE + except IndexError: + rss = None + except FileNotFoundError: + rss = None + except ProcessLookupError: + rss = None + return rss + + +def pid_to_vm_size(pid): + try: + vm_size = int(rline1( + '/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE + except IndexError: + vm_size = None + except FileNotFoundError: + vm_size = None + except ProcessLookupError: + vm_size = None + return vm_size + + + + + + + + +def signal_handler(signum, frame): + """ + """ + for i in sig_list: + signal(i, signal_handler_inner) + log('Signal handler called with the {} signal '.format( + sig_dict[signum])) + update_stat_dict_and_print(None) + log('Exit') + exit() + + +def signal_handler_inner(signum, frame): + """ + """ + log('Signal handler called with the {} signal (ignored) '.format( + sig_dict[signum])) + + +def exe(cmd): + """ + """ + log('Execute the command: {}'.format(cmd)) + t0 = time() + write_self_oom_score_adj(self_oom_score_adj_max) + err = os.system(cmd) + write_self_oom_score_adj(self_oom_score_adj_min) + dt = time() - t0 + log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3))) + return err + + +def write(path, string): + """ + """ + with open(path, 'w') as f: + f.write(string) + + +def write_self_oom_score_adj(new_value): + """ + """ + if root: + write('/proc/self/oom_score_adj', new_value) + + +self_oom_score_adj_min = '-600' +self_oom_score_adj_max = '-6' + + +write_self_oom_score_adj(self_oom_score_adj_min) + + +def valid_re(reg_exp): + """Validate regular expression. + """ + try: + search(reg_exp, '') + except invalid_re: + log('Invalid config: invalid regexp: {}'.format(reg_exp)) + exit(1) + + +def func_print_proc_table(): + """ + """ + print_proc_table = True + find_victim(print_proc_table) + exit() + + +def log(*msg): + """ + """ + try: + print(*msg) + except OSError: + sleep(0.01) + if separate_log: + try: + info(*msg) + except OSError: + sleep(0.01) + + +def print_version(): + """ + """ + try: + v = rline1('/etc/nohang/version') + except FileNotFoundError: + v = None + if v is None: + print('Nohang unknown version') + else: + print('Nohang ' + v) + exit() + + +def test(): + """ + """ + print('\n(This option is not ready to use!)\n') + + print(version) + print(argv) + + hr = '==================================' + print(hr) + print("uptime()") + print(uptime()) + + print(hr) + print("os.uname()") + print(os.uname()) + + print(hr) + print("pid_to_starttime('self')") + print(pid_to_starttime('self')) + + print(hr) + print("get_victim_id('self')") + print(get_victim_id('self')) + + print(hr) + print("errprint('test')") + print(errprint('test')) + + print(hr) + print("mlockall()") + print(mlockall()) + + print(hr) + print("pid_to_state('2')") + print(pid_to_state('2')) + + exit() + + +def pid_to_cgroup_v1(pid): + """ + """ + cgroup_v1 = '' + try: + with open('/proc/' + pid + '/cgroup') as f: + for index, line in enumerate(f): + if index == cgroup_v1_index: + cgroup_v1 = '/' + line.partition('/')[2][:-1] + return cgroup_v1 + except FileNotFoundError: + return '' + + +def pid_to_cgroup_v2(pid): + """ + """ + cgroup_v2 = '' + try: + with open('/proc/' + pid + '/cgroup') as f: + for index, line in enumerate(f): + if index == cgroup_v2_index: + cgroup_v2 = line[3:-1] + return cgroup_v2 + except FileNotFoundError: + return '' + + +def pid_to_starttime(pid): + """ handle FNF error! + """ + try: + starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[ + 2].split(' ')[20] + + except UnicodeDecodeError: + # print('LOL') + with open('/proc/' + pid + '/stat', 'rb') as f: + starttime = f.read().decode('utf-8', 'ignore').rpartition( + ')')[2].split(' ')[20] + + return float(starttime) / SC_CLK_TCK + + +def get_victim_id(pid): + """victim_id is starttime + pid""" + try: + return rline1('/proc/' + pid + '/stat').rpartition( + ')')[2].split(' ')[20] + '_pid' + pid + except FileNotFoundError: + return '' + except ProcessLookupError: + return '' + + +def pid_to_state(pid): + """ Handle FNF error! (BTW it already handled in find_victim_info()) + """ + return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1] + + +def pid_to_name(pid): + """ + """ + try: + with open('/proc/' + pid + '/comm', 'rb') as f: + return f.read().decode('utf-8', 'ignore')[:-1] + except FileNotFoundError: + return '' + except ProcessLookupError: + return '' + + +def pid_to_ppid(pid): + """ + """ + try: + with open('/proc/' + pid + '/status') as f: + for n, line in enumerate(f): + if n is ppid_index: + return line.split('\t')[1].strip() + except FileNotFoundError: + return '' + except ProcessLookupError: + return '' + except UnicodeDecodeError: + with open('/proc/' + pid + '/status', 'rb') as f: + f_list = f.read().decode('utf-8', 'ignore').split('\n') + for i in range(len(f_list)): + if i is ppid_index: + return f_list[i].split('\t')[1] + + +def pid_to_ancestry(pid, max_ancestry_depth=1): + """ + """ + if max_ancestry_depth == 1: + ppid = pid_to_ppid(pid) + pname = pid_to_name(ppid) + return '\n PPID: {} ({})'.format(ppid, pname) + if max_ancestry_depth == 0: + return '' + anc_list = [] + for i in range(max_ancestry_depth): + ppid = pid_to_ppid(pid) + pname = pid_to_name(ppid) + anc_list.append((ppid, pname)) + if ppid == '1': + break + pid = ppid + a = '' + for i in anc_list: + a = a + ' <= PID {} ({})'.format(i[0], i[1]) + return '\n Ancestry: ' + a[4:] + + +def pid_to_cmdline(pid): + """ + Get process cmdline by pid. + + pid: str pid of required process + returns string cmdline + """ + try: + with open('/proc/' + pid + '/cmdline') as f: + return f.read().replace('\x00', ' ').rstrip() + except FileNotFoundError: + return '' + + +def pid_to_environ(pid): + """ + Get process environ by pid. + + pid: str pid of required process + returns string environ + """ + try: + with open('/proc/' + pid + '/environ') as f: + return f.read().replace('\x00', ' ').rstrip() + except FileNotFoundError: + return '' + + +def pid_to_realpath(pid): + try: + return os.path.realpath('/proc/' + pid + '/exe') + except FileNotFoundError: + return '' + + +def pid_to_uid(pid): + """return euid""" + try: + with open('/proc/' + pid + '/status') as f: + for n, line in enumerate(f): + if n is uid_index: + return line.split('\t')[2] + except UnicodeDecodeError: + with open('/proc/' + pid + '/status', 'rb') as f: + f_list = f.read().decode('utf-8', 'ignore').split('\n') + return f_list[uid_index].split('\t')[2] + except FileNotFoundError: + return '' + + +def pid_to_badness(pid): + """Find and modify badness (if it needs).""" + + try: + + oom_score = int(rline1('/proc/' + pid + '/oom_score')) + badness = oom_score + + if decrease_oom_score_adj: + oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj')) + if badness > oom_score_adj_max and oom_score_adj > 0: + badness = badness - oom_score_adj + oom_score_adj_max + + if regex_matching: + name = pid_to_name(pid) + for re_tup in processname_re_list: + if search(re_tup[1], name) is not None: + badness += int(re_tup[0]) + + if re_match_cgroup_v1: + cgroup_v1 = pid_to_cgroup_v1(pid) + for re_tup in cgroup_v1_re_list: + if search(re_tup[1], cgroup_v1) is not None: + badness += int(re_tup[0]) + + if re_match_cgroup_v2: + cgroup_v2 = pid_to_cgroup_v2(pid) + for re_tup in cgroup_v2_re_list: + if search(re_tup[1], cgroup_v2) is not None: + badness += int(re_tup[0]) + + if re_match_realpath: + realpath = pid_to_realpath(pid) + for re_tup in realpath_re_list: + if search(re_tup[1], realpath) is not None: + badness += int(re_tup[0]) + + if re_match_cmdline: + cmdline = pid_to_cmdline(pid) + for re_tup in cmdline_re_list: + if search(re_tup[1], cmdline) is not None: + badness += int(re_tup[0]) + + if re_match_environ: + environ = pid_to_environ(pid) + for re_tup in environ_re_list: + if search(re_tup[1], environ) is not None: + badness += int(re_tup[0]) + + if re_match_uid: + uid = pid_to_uid(pid) + for re_tup in uid_re_list: + if search(re_tup[1], uid) is not None: + badness += int(re_tup[0]) + + if forbid_negative_badness: + if badness < 0: + badness = 0 + + return badness, oom_score + + except FileNotFoundError: + return None, None + except ProcessLookupError: + return None, None + + +def pid_to_status(pid): + """ + """ + + try: + + with open('/proc/' + pid + '/status') as f: + + for n, line in enumerate(f): + + if n is 0: + name = line.split('\t')[1][:-1] + + if n is state_index: + state = line.split('\t')[1][0] + continue + + if n is ppid_index: + ppid = line.split('\t')[1][:-1] + continue + + if n is uid_index: + uid = line.split('\t')[2] + continue + + if n is vm_size_index: + vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) + continue + + if n is vm_rss_index: + vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) + continue + + if n is vm_swap_index: + vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) + break + + return name, state, ppid, uid, vm_size, vm_rss, vm_swap + + except UnicodeDecodeError: + return pid_to_status_unicode(pid) + + except FileNotFoundError: + return None + + except ProcessLookupError: + return None + + except ValueError: + return None + + +def pid_to_status_unicode(pid): + """ + """ + try: + + with open('/proc/' + pid + '/status', 'rb') as f: + f_list = f.read().decode('utf-8', 'ignore').split('\n') + + for i in range(len(f_list)): + + if i is 0: + name = f_list[i].split('\t')[1] + + if i is state_index: + state = f_list[i].split('\t')[1][0] + + if i is ppid_index: + ppid = f_list[i].split('\t')[1] + + if i is uid_index: + uid = f_list[i].split('\t')[2] + + if i is vm_size_index: + vm_size = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is vm_rss_index: + vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) + + if i is vm_swap_index: + vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) + + return name, state, ppid, uid, vm_size, vm_rss, vm_swap + + except FileNotFoundError: + return None + + except ProcessLookupError: + return None + + except ValueError: + return None + + +########################################################################## + + +def uptime(): + """ + """ + return float(rline1('/proc/uptime').split(' ')[0]) + + +def errprint(*text): + """ + """ + print(*text, file=stderr, flush=True) + + +def mlockall(): + """Lock all memory to prevent swapping nohang process.""" + + MCL_CURRENT = 1 + MCL_FUTURE = 2 + MCL_ONFAULT = 4 + + libc = CDLL('libc.so.6', use_errno=True) + + result = libc.mlockall( + MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT + ) + if result != 0: + result = libc.mlockall( + MCL_CURRENT | MCL_FUTURE + ) + if result != 0: + log('WARNING: cannot lock all memory') + else: + pass + # log('All memory locked with MCL_CURRENT | MCL_FUTURE') + else: + pass + # log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT') + + +def update_stat_dict_and_print(key): + """ + """ + + if key is not None: + + if key not in stat_dict: + + stat_dict.update({key: 1}) + + else: + + new_value = stat_dict[key] + 1 + stat_dict.update({key: new_value}) + + if print_total_stat: + + stats_msg = 'Total stat (what happened in the last {}):'.format( + format_time(time() - start_time)) + + for i in stat_dict: + stats_msg += '\n {}: {}'.format(i, stat_dict[i]) + + log(stats_msg) + + +def find_psi_metrics_value(psi_path, psi_metrics): + """ + """ + + if psi_support: + + if psi_metrics == 'some_avg10': + return float(rline1(psi_path).split(' ')[1].split('=')[1]) + if psi_metrics == 'some_avg60': + return float(rline1(psi_path).split(' ')[2].split('=')[1]) + if psi_metrics == 'some_avg300': + return float(rline1(psi_path).split(' ')[3].split('=')[1]) + + if psi_metrics == 'full_avg10': + with open(psi_path) as f: + psi_list = f.readlines() + return float(psi_list[1].split(' ')[1].split('=')[1]) + if psi_metrics == 'full_avg60': + with open(psi_path) as f: + psi_list = f.readlines() + return float(psi_list[1].split(' ')[2].split('=')[1]) + if psi_metrics == 'full_avg300': + with open(psi_path) as f: + psi_list = f.readlines() + return float(psi_list[1].split(' ')[3].split('=')[1]) + + +def check_mem_and_swap(): + """find mem_available, swap_total, swap_free""" + with open('/proc/meminfo') as f: + for n, line in enumerate(f): + if n is 2: + mem_available = int(line.split(':')[1][:-4]) + continue + if n is swap_total_index: + swap_total = int(line.split(':')[1][:-4]) + continue + if n is swap_free_index: + swap_free = int(line.split(':')[1][:-4]) + break + return mem_available, swap_total, swap_free + + +def check_zram(): + """find MemUsedZram""" + disksize_sum = 0 + mem_used_total_sum = 0 + + for dev in os.listdir('/sys/block'): + if dev.startswith('zram'): + stat = zram_stat(dev) + disksize_sum += int(stat[0]) + mem_used_total_sum += int(stat[1]) + + # Means that when setting zram disksize = 1 GiB available memory + # decrease by 0.0042 GiB. + # Found experimentally, requires clarification with different kernaels and + # architectures. + # On small disk drives (up to gigabyte) it can be more, up to 0.0045. + # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should + # be 0.001: + # ("zram uses about 0.1% of the size of the disk" + # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt), + # but this statement contradicts the experimental data. + # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize + # Found experimentally. + ZRAM_DISKSIZE_FACTOR = 0.0042 + + return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0 + + +def format_time(t): + """ + """ + t = int(t) + if t < 60: + return '{} sec'.format(t) + elif t >= 60 and t < 3600: + m = t // 60 + s = t % 60 + return '{} min {} sec'.format(m, s) + else: + h = t // 3600 + s0 = t - h * 3600 + m = s0 // 60 + s = s0 % 60 + return '{} h {} min {} sec'.format(h, m, s) + + +def string_to_float_convert_test(string): + """Try to interprete string values as floats.""" + try: + return float(string) + except ValueError: + return None + + +def string_to_int_convert_test(string): + """Try to interpret string values as integers.""" + try: + return int(string) + except ValueError: + return None + + +def conf_parse_string(param): + """ + Get string parameters from the config dict. + + param: config_dict key + returns config_dict[param].strip() + """ + if param in config_dict: + return config_dict[param].strip() + else: + errprint('All the necessary parameters must be in the config') + errprint('There is no "{}" parameter in the config'.format(param)) + exit(1) + + +def conf_parse_bool(param): + """ + Get bool parameters from the config_dict. + + param: config_dicst key + returns bool + """ + if param in config_dict: + param_str = config_dict[param] + if param_str == 'True': + return True + elif param_str == 'False': + return False + else: + errprint('Invalid value of the "{}" parameter.'.format(param)) + errprint('Valid values are True and False.') + errprint('Exit') + exit(1) + else: + errprint('All the necessary parameters must be in the config') + errprint('There is no "{}" parameter in the config'.format(param)) + exit(1) + + +def rline1(path): + """read 1st line from path.""" + try: + with open(path) as f: + for line in f: + return line[:-1] + except UnicodeDecodeError: + with open(path, 'rb') as f: + return f.read(999).decode( + 'utf-8', 'ignore').split('\n')[0] # use partition()! + + +def kib_to_mib(num): + """Convert KiB values to MiB values.""" + return round(num / 1024.0) + + +def percent(num): + """Interprete num as percentage.""" + return round(num * 100, 1) + + +def just_percent_mem(num): + """convert num to percent and justify""" + return str(round(num * 100, 1)).rjust(4, ' ') + + +def just_percent_swap(num): + """ + """ + return str(round(num * 100, 1)).rjust(5, ' ') + + +def human(num, lenth): + """Convert KiB values to MiB values with right alignment""" + return str(round(num / 1024)).rjust(lenth, ' ') + + +def zram_stat(zram_id): + """ + Get zram state. + + zram_id: str zram block-device id + returns bytes diskcize, str mem_used_total + """ + try: + disksize = rline1('/sys/block/' + zram_id + '/disksize') + except FileNotFoundError: + return '0', '0' + if disksize == ['0\n']: + return '0', '0' + try: + mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ') + mm_stat_list = [] + for i in mm_stat: + if i != '': + mm_stat_list.append(i) + mem_used_total = mm_stat_list[2] + except FileNotFoundError: + mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total') + return disksize, mem_used_total # BYTES, str + + +def send_notify_warn(): + """ + Look for process with maximum 'badness' and warn user with notification. + (implement Low memory warnings) + """ + log('Warning threshold exceeded') + + if check_warning_exe: + exe(warning_exe) + + else: + + title = 'Low memory' + + body = 'MemAvail: {}%\nSwapFree: {}%'.format( + round(mem_available / mem_total * 100), + round(swap_free / (swap_total + 0.1) * 100) + ) + + send_notification(title, body) + + +def send_notify(signal, name, pid): + """ + Notificate about OOM Preventing. + + signal: key for notify_sig_dict + name: str process name + pid: str process pid + """ + + # wait for memory release after corrective action + # may be useful if free memory was about 0 immediately after + # corrective action + sleep(0.05) + + title = 'Freeze prevention' + body = '{} [{}] {}'.format( + notify_sig_dict[signal], + pid, + name.replace( + # symbol '&' can break notifications in some themes, + # therefore it is replaced by '*' + '&', '*' + ) + ) + + send_notification(title, body) + + +def send_notify_etc(pid, name, command): + """ + Notificate about OOM Preventing. + + command: str command that will be executed + name: str process name + pid: str process pid + """ + title = 'Freeze prevention' + body = 'Victim is [{}] {}\nExecute the co' \ + 'mmand:\n{}'.format( + pid, name.replace('&', '*'), command.replace('&', '*')) + + send_notification(title, body) + + +def send_notification(title, body): + """ + """ + split_by = '#' * 16 + + t000 = time() + + path_to_cache = '/dev/shm/nohang_notify_cache_uid{}_time{}'.format( + str(self_uid), t000 + ) + + text = '{}{}{}'.format(title, split_by, body) + + try: + with open(path_to_cache, 'w') as f: + f.write(text) + os.chmod(path_to_cache, 0o600) + except OSError: + log('OSError while send notification ' + '(No space left on device: /dev/shm)') + return None + + cmd = '{} --uid {} --time {} &'.format(notify_helper_path, self_uid, t000) + + exe(cmd) + + +def get_pid_list(): + """ + Find pid list expect kthreads and zombies + """ + pid_list = [] + for pid in os.listdir('/proc'): + if os.path.exists('/proc/' + pid + '/exe') is True: + pid_list.append(pid) + return pid_list + + +pid_list = get_pid_list() + + +def get_non_decimal_pids(): + """ + """ + non_decimal_list = [] + for pid in pid_list: + if pid[0].isdecimal() is False: + non_decimal_list.append(pid) + return non_decimal_list + + +def find_victim(_print_proc_table): + """ + Find the process with highest badness and its badness adjustment + Return pid and badness + """ + + ft1 = time() + + pid_list = get_pid_list() + + pid_list.remove(self_pid) + + if '1' in pid_list: + pid_list.remove('1') + + non_decimal_list = get_non_decimal_pids() + + for i in non_decimal_list: + if i in pid_list: + pid_list.remove(i) + + pid_badness_list = [] + + if _print_proc_table: + + if extra_table_info == 'None': + extra_table_title = '' + + elif extra_table_info == 'cgroup_v1': + extra_table_title = 'CGroup_v1' + + elif extra_table_info == 'cgroup_v2': + extra_table_title = 'CGroup_v2' + + elif extra_table_info == 'cmdline': + extra_table_title = 'cmdline' + + elif extra_table_info == 'environ': + extra_table_title = 'environ' + + elif extra_table_info == 'realpath': + extra_table_title = 'realpath' + + elif extra_table_info == 'All': + extra_table_title = '[CGroup] [CmdLine] [RealPath]' + else: + extra_table_title = '' + + hr = '#' * 115 + + log(hr) + log('# PID PPID badness oom_score oom_score_adj e' + 'UID S VmSize VmRSS VmSwap Name {}'.format( + extra_table_title)) + log('#------- ------- ------- --------- ------------- -------' + '--- - ------ ----- ------ --------------- --------') + + for pid in pid_list: + + badness = pid_to_badness(pid)[0] + + if badness is None: + continue + + if _print_proc_table: + + try: + oom_score = rline1('/proc/' + pid + '/oom_score') + oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') + except FileNotFoundError: + continue + + if pid_to_status(pid) is None: + continue + else: + (name, state, ppid, uid, vm_size, vm_rss, + vm_swap) = pid_to_status(pid) + + if extra_table_info == 'None': + extra_table_line = '' + + elif extra_table_info == 'cgroup_v1': + extra_table_line = pid_to_cgroup_v1(pid) + + elif extra_table_info == 'cgroup_v2': + extra_table_line = pid_to_cgroup_v2(pid) + + elif extra_table_info == 'cmdline': + extra_table_line = pid_to_cmdline(pid) + + elif extra_table_info == 'environ': + extra_table_line = pid_to_environ(pid) + + elif extra_table_info == 'realpath': + extra_table_line = pid_to_realpath(pid) + + elif extra_table_info == 'All': + extra_table_line = '[CG: {}] [CL: {}] [RP: {}]'.format( + pid_to_cgroup_v1(pid), + pid_to_cmdline(pid), + pid_to_realpath(pid) + ) + else: + extra_table_line = '' + + log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format( + pid.rjust(7), + ppid.rjust(7), + str(badness).rjust(7), + oom_score.rjust(9), + oom_score_adj.rjust(13), + uid.rjust(10), + state, + str(vm_size).rjust(6), + str(vm_rss).rjust(5), + str(vm_swap).rjust(6), + name.ljust(15), + extra_table_line + ) + ) + + pid_badness_list.append((pid, badness)) + + real_proc_num = len(pid_badness_list) + + # Make list of (pid, badness) tuples, sorted by 'badness' values + # print(pid_badness_list) + pid_tuple_list = sorted( + pid_badness_list, + key=itemgetter(1), + reverse=True + )[0] + + pid = pid_tuple_list[0] + + # Get maximum 'badness' value + victim_badness = pid_tuple_list[1] + victim_name = pid_to_name(pid) + + if _print_proc_table: + log(hr) + + log('Found {} processes with existing /proc/[pid]/exe'.format( + real_proc_num)) + + log( + 'Process with highest badness (found in {} ms):\n PID: {}, Na' + 'me: {}, badness: {}'.format( + round((time() - ft1) * 1000), + pid, + victim_name, + victim_badness + ) + ) + + return pid, victim_badness, victim_name + + +def find_victim_info(pid, victim_badness, name): + """ + """ + status0 = time() + + try: + + with open('/proc/' + pid + '/status') as f: + + for n, line in enumerate(f): + + if n is state_index: + state = line.split('\t')[1].rstrip() + continue + + if n is ppid_index: + ppid = line.split('\t')[1] + continue + + if n is uid_index: + uid = line.split('\t')[2] + continue + + if n is vm_size_index: + vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) + continue + + if n is vm_rss_index: + vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) + continue + + if detailed_rss: + + if n is anon_index: + anon_rss = kib_to_mib( + int(line.split('\t')[1][:-4])) + continue + + if n is file_index: + file_rss = kib_to_mib( + int(line.split('\t')[1][:-4])) + continue + + if n is shmem_index: + shmem_rss = kib_to_mib( + int(line.split('\t')[1][:-4])) + continue + + if n is vm_swap_index: + vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) + break + + cmdline = pid_to_cmdline(pid) + oom_score = rline1('/proc/' + pid + '/oom_score') + oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') + + except FileNotFoundError: + log('The victim died in the search process: FileNotFoundError') + update_stat_dict_and_print( + 'The victim died in the search process: FileNotFoundError') + return None + except ProcessLookupError: + log('The victim died in the search process: ProcessLookupError') + update_stat_dict_and_print( + 'The victim died in the search process: ProcessLookupError') + return None + except UnicodeDecodeError: + + with open('/proc/' + pid + '/status', 'rb') as f: + f_list = f.read().decode('utf-8', 'ignore').split('\n') + + for i in range(len(f_list)): + + if i is state_index: + state = f_list[i].split('\t')[1].rstrip() + + if i is ppid_index: + ppid = f_list[i].split('\t')[1] + + if i is uid_index: + uid = f_list[i].split('\t')[2] + + if i is vm_size_index: + vm_size = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is vm_rss_index: + vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) + + if detailed_rss: + + if i is anon_index: + anon_rss = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is file_index: + file_rss = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is shmem_index: + shmem_rss = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is vm_swap_index: + vm_swap = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + cmdline = pid_to_cmdline(pid) + oom_score = rline1('/proc/' + pid + '/oom_score') + oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') + + except IndexError: + log('The victim died in the search process: IndexError') + update_stat_dict_and_print( + 'The victim died in the search process: IndexError') + return None + except ValueError: + log('The victim died in the search process: ValueError') + update_stat_dict_and_print( + 'The victim died in the search process: ValueError') + return None + except FileNotFoundError: + log('The victim died in the search process: FileNotFoundError') + update_stat_dict_and_print( + 'The victim died in the search process: FileNotFoundError') + return None + except ProcessLookupError: + log('The victim died in the search process: ProcessLookupError') + update_stat_dict_and_print( + 'The victim died in the search process: ProcessLookupError') + return None + + len_vm = len(str(vm_size)) + + try: + realpath = os.path.realpath('/proc/' + pid + '/exe') + victim_lifetime = format_time(uptime() - pid_to_starttime(pid)) + victim_cgroup_v1 = pid_to_cgroup_v1(pid) + victim_cgroup_v2 = pid_to_cgroup_v2(pid) + + except FileNotFoundError: + print('The victim died in the search process: FileNotFoundError') + update_stat_dict_and_print( + 'The victim died in the search process: FileNotFoundError') + return None + + ancestry = pid_to_ancestry(pid, max_ancestry_depth) + + if detailed_rss: + detailed_rss_info = ' (' \ + 'Anon: {} MiB, ' \ + 'File: {} MiB, ' \ + 'Shmem: {} MiB)'.format( + anon_rss, + file_rss, + shmem_rss) + else: + detailed_rss_info = '' + + victim_info = 'Victim information (found in {} ms):' \ + '\n Name: {}' \ + '\n State: {}' \ + '\n PID: {}' \ + '{}' \ + '\n EUID: {}' \ + '\n badness: {}, ' \ + 'oom_score: {}, ' \ + 'oom_score_adj: {}' \ + '\n VmSize: {} MiB' \ + '\n VmRSS: {} MiB {}' \ + '\n VmSwap: {} MiB' \ + '\n CGroup_v1: {}' \ + '\n CGroup_v2: {}' \ + '\n Realpath: {}' \ + '\n Cmdline: {}' \ + '\n Lifetime: {}'.format( + round((time() - status0) * 1000), + name, + state, + pid, + ancestry, + uid, + victim_badness, + oom_score, + oom_score_adj, + vm_size, + str(vm_rss).rjust(len_vm), + detailed_rss_info, + str(vm_swap).rjust(len_vm), + victim_cgroup_v1, + victim_cgroup_v2, + realpath, + cmdline, + victim_lifetime) + + return victim_info + + + + + + + + + + + + +def implement_corrective_action(signal): + """ + Find victim with highest badness and send SIGTERM/SIGKILL + """ + + + # выходим из фции, если для SIGTERM порога не превышено время min_delay_after_sigterm и спим в течение over_sleep + if signal is SIGTERM: + + dt = time() - actions_time_dict['action_handled'][0] + + if dt < min_delay_after_sigterm: + pass + # print('min_delay_after_sigterm IS NOT EXCEEDED ({} < {})'.format(round(dt, 3), min_delay_after_sigterm)) + + if print_sleep_periods: + pass + # log('Sleep {} sec [in implement_corrective_action()]'.format(over_sleep)) + + sleep(over_sleep) + + return None # время задержки между действиями не истекло + else: + pass + # print('min_delay_after_sigterm IS EXCEEDED, it is time to action') + + + + + """ + + При заходе в фцию проверяем права на сигтерм. Права на сигкилл у нас всегда есть + (потому что идем дальше только после полн освободж памяти после смерти жертвы) + + actions_time_dict[action_handled] = time() + actions_time_dict[veto] = True + + actions_time_dict['action_handled'] = [time(), victim_id] + + + + """ + + + # log(mem_info) + + pid, victim_badness, name = find_victim(print_proc_table) + + if victim_badness >= min_badness: + + if print_victim_info: + victim_info = find_victim_info(pid, victim_badness, name) + log(victim_info) + + + + + # пороги могли превысиься за время поиска жертвы (поиск может занимать сотни миллисекунд) + mem_available, swap_total, swap_free = check_mem_and_swap() + + ma_mib = int(mem_available) / 1024.0 + sf_mib = int(swap_free) / 1024.0 + log('Memory status before implementing a corrective act' + 'ion:\n MemAvailable' + ': {} MiB, SwapFree: {} MiB'.format( + round(ma_mib, 1), round(sf_mib, 1) + ) + ) + + if (mem_available <= mem_min_sigkill_kb and + swap_free <= swap_min_sigkill_kb): + log('Hard threshold exceeded') + signal = SIGKILL + + + + victim_id = get_victim_id(pid) + + + + + + + # kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ + # ЗАДАННОГО ВРЕМЕНИ + + # переопределяем сигнал для старых жертв + if signal is SIGTERM: + + if victim_id in victim_dict: + + dt = time() - victim_dict[victim_id] + + if dt > max_post_sigterm_victim_lifetime: + pass + # print('max_post_sigterm_victim_lifetime exceeded: the victim will get SIGKILL') + signal = SIGKILL + + + + + + + + + + + + # matching with re to customize corrective actions + soft_match = False + + if soft_actions and signal is SIGTERM: + name = pid_to_name(pid) + cgroup_v1 = pid_to_cgroup_v1(pid) + service = '' + cgroup_v1_tail = cgroup_v1.rpartition('/')[2] + if cgroup_v1_tail.endswith('.service'): + service = cgroup_v1_tail + for i in soft_actions_list: + unit = i[0] + if unit == 'name': + u = name + else: + u = cgroup_v1 + regexp = i[1] + command = i[2] + if search(regexp, u) is not None: + log("Regexp '{}' matches with {} '{}'".format( + regexp, unit, u)) + soft_match = True + break + + if soft_match: + + # todo: make new func + m = check_mem_and_swap() + ma = int(m[0]) / 1024.0 + sf = int(m[2]) / 1024.0 + log('Memory status before implementing a corrective act' + 'ion:\n MemAvailable' + ': {} MiB, SwapFree: {} MiB'.format( + round(ma, 1), round(sf, 1) + ) + ) + + cmd = command.replace( + '$PID', + pid).replace( + '$NAME', + pid_to_name(pid)).replace( + '$SERVICE', + service) + + exit_status = exe(cmd) + + exit_status = str(exit_status) + + response_time = time() - time0 + + # тут надо, как и при дефолтном действии, проверять существование жертвы, ее реакцию на действие, + # и время ее смерти в случае успеха, о обновление таймстемпов действия + + etc_info = 'Implement a corrective act' \ + 'ion:\n Run the command: {}' \ + '\n Exit status: {}; total response ' \ + 'time: {} ms'.format( + cmd, + exit_status, + round(response_time * 1000)) + + log(etc_info) + + key = "Run the command '{}'".format(cmd) + update_stat_dict_and_print(key) + + if gui_notifications: + send_notify_etc( + pid, + name, + command.replace('$PID', pid).replace( + '$NAME', pid_to_name(pid))) + + + + + + + + + else: + + # обычное действие через сигнал + try: + + + os.kill(int(pid), signal) + kill_timestamp = time() + response_time = kill_timestamp - time0 + + + + + + + + while True: + exe_exists = os.path.exists('/proc/{}/exe'.format(pid)) + rss = pid_to_rss(pid) + dt = time() - kill_timestamp + # log('Victim VmRSS: {} KiB'.format(rss)) + if not exe_exists or rss == 0 or dt > 0.01: + #print(dt) + break + sleep(0.001) + + if dt > 0.01: + # log('Timer (value = 0.01 sec) expired; seems like the victim handles signal') + + actions_time_dict['action_handled'] = [time(), get_victim_id(pid)] + + + if victim_id not in victim_dict: # хз как надо. + victim_dict.update({victim_id: time()}) + + + # log('actions_time_dict', actions_time_dict) + # log('victim_dict', victim_dict) + + + + + else: + log('Process exited (VmRSS = 0) in {} sec'.format( + round(dt, 5))) + + + + + + + + if signal is SIGKILL or not exe_exists or rss == 0: + + while True: + sleep(0.001) + rss = pid_to_rss(pid) # рсс не важен когда путь не существует. Проверяй просто существование пид. + if rss is None: + break + t1 = time() + kill_duration = t1 - kill_timestamp + log('The victim died in {} sec'.format( + round(kill_duration, 3))) + + + mem_available, swap_total, swap_free = check_mem_and_swap() + + ma_mib = int(mem_available) / 1024.0 + sf_mib = int(swap_free) / 1024.0 + log('Memory status after implementing a corrective act' + 'ion:\n MemAvailable' + ': {} MiB, SwapFree: {} MiB'.format( + round(ma_mib, 1), round(sf_mib, 1) + ) + ) + + + + + + + + send_result = 'total response time: {} ms'.format( + round(response_time * 1000)) + + preventing_oom_message = 'Implement a corrective action:' \ + '\n Send {} to the victim; {}'.format( + sig_dict[signal], send_result) + + key = 'Send {} to {}'.format(sig_dict[signal], name) + + if signal is SIGKILL and post_kill_exe != '': + + cmd = post_kill_exe.replace('$PID', pid).replace( + '$NAME', pid_to_name(pid)) + + log('Execute post_kill_exe') + + exe(cmd) + + if gui_notifications: + send_notify(signal, name, pid) + + except FileNotFoundError: + response_time = time() - time0 + send_result = 'no such process; response time: {} ms'.format( + round(response_time * 1000)) + key = 'FileNotFoundError (the victim died in the se' \ + 'arch process): ' + except ProcessLookupError: + response_time = time() - time0 + send_result = 'no such process; response time: {} ms'.format( + round(response_time * 1000)) + key = 'ProcessLookupError (the victim died in the se' \ + 'arch process): ' + + try: + log(preventing_oom_message) + + except UnboundLocalError: + preventing_oom_message = key + + update_stat_dict_and_print(key) + + else: + + response_time = time() - time0 + victim_badness_is_too_small = 'victim badness {} < min_b' \ + 'adness {}; nothing to do; response time: {} ms'.format( + victim_badness, + min_badness, + round(response_time * 1000)) + + log(victim_badness_is_too_small) + + # update stat_dict + key = 'victim badness < min_badness' + update_stat_dict_and_print(key) + + # тут надо поспать хорошенько. а может и счетчики поправить. + # херню несу. во-первых, внезапно может кто-то появиться c блльшим бэднес.. Далее надо минимизировать аутпут спам. + sleep(over_sleep) + + + # обновлять время не на каждый кил, а только на килл той жертвы, которая не отвечала на софт экшн. + # Вывод: ко времени действия прилагать также виктим айди. + + print('##################################################################') + + +def sleep_after_check_mem(): + """Specify sleep times depends on rates and avialable memory.""" + + if stable_sleep: + + if print_sleep_periods: + log('Sleep {} sec'.format(min_sleep)) + + sleep(min_sleep) + return None + + if mem_min_sigkill_kb < mem_min_sigterm_kb: + mem_point = mem_available - mem_min_sigterm_kb + else: + mem_point = mem_available - mem_min_sigkill_kb + + if swap_min_sigkill_kb < swap_min_sigterm_kb: + swap_point = swap_free - swap_min_sigterm_kb + else: + swap_point = swap_free - swap_min_sigkill_kb + + if swap_point < 0: + swap_point = 0 + + if mem_point < 0: + mem_point = 0 + + t_mem = mem_point / rate_mem + t_swap = swap_point / rate_swap + t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram + if t_zram < 0: + t_zram = 0 + + t_mem_swap = t_mem + t_swap + t_mem_zram = t_mem + t_zram + + if t_mem_swap <= t_mem_zram: + t = t_mem_swap + else: + t = t_mem_zram + + if t > max_sleep: + t = max_sleep + elif t < min_sleep: + t = min_sleep + else: + pass + + if print_sleep_periods: + + log( + 'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format( + round(t, 2), + round(t_mem, 2), + round(t_swap, 2), + round(t_zram, 2) + ) + ) + + try: + stdout.flush() + except OSError: + pass + + sleep(t) + + +def calculate_percent(arg_key): + """ + parse conf dict + Calculate mem_min_KEY_percent. + + Try use this one) + arg_key: str key for config_dict + returns int mem_min_percent or NoneType if got some error + """ + + if arg_key in config_dict: + mem_min = config_dict[arg_key] + + if mem_min.endswith('%'): + # truncate percents, so we have a number + mem_min_percent = mem_min[:-1].strip() + # then 'float test' + mem_min_percent = string_to_float_convert_test(mem_min_percent) + if mem_min_percent is None: + errprint('Invalid {} value, not float\nExit'.format(arg_key)) + exit(1) + # Final validations... + if mem_min_percent < 0 or mem_min_percent > 100: + errprint( + '{}, as percents value, out of ran' + 'ge [0; 100]\nExit'.format(arg_key)) + exit(1) + + # mem_min_sigterm_percent is clean and valid float percentage. Can + # translate into Kb + mem_min_kb = mem_min_percent / 100 * mem_total + mem_min_mb = round(mem_min_kb / 1024) + + elif mem_min.endswith('M'): + mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip()) + if mem_min_mb is None: + errprint('Invalid {} value, not float\nExit'.format(arg_key)) + exit(1) + mem_min_kb = mem_min_mb * 1024 + if mem_min_kb > mem_total: + errprint( + '{} value can not be greater then MemT' + 'otal ({} MiB)\nExit'.format( + arg_key, round( + mem_total / 1024))) + exit(1) + mem_min_percent = mem_min_kb / mem_total * 100 + + else: + log('Invalid {} units in config.\n Exit'.format(arg_key)) + exit(1) + mem_min_percent = None + + else: + log('{} not in config\nExit'.format(arg_key)) + exit(1) + mem_min_percent = None + + return mem_min_kb, mem_min_mb, mem_min_percent + + +########################################################################## + + +print_proc_table_flag = False + +if len(argv) == 1: + if os.path.exists('./nohang.conf'): + config = os.getcwd() + '/nohang.conf' + else: + config = '/etc/nohang/nohang.conf' + +elif len(argv) == 2: + if argv[1] == '--help' or argv[1] == '-h': + print(help_mess) + exit() + elif argv[1] == '--version' or argv[1] == '-v': + print_version() + elif argv[1] == '--test' or argv[1] == '-t': + test() + elif argv[1] == '--print-proc-table' or argv[1] == '-p': + print_proc_table_flag = True + if os.path.exists('./nohang.conf'): + config = os.getcwd() + '/nohang.conf' + else: + config = '/etc/nohang/nohang.conf' + else: + errprint('Unknown option: {}'.format(argv[1])) + exit(1) + +elif len(argv) == 3: + if argv[1] == '--config' or argv[1] == '-c': + config = argv[2] + else: + errprint('Unknown option: {}'.format(argv[1])) + exit(1) + +else: + errprint('Invalid CLI input: too many options') + exit(1) + + +########################################################################## + + +# find mem_total +# find positions of SwapFree and SwapTotal in /proc/meminfo + +with open('/proc/meminfo') as f: + mem_list = f.readlines() + +mem_list_names = [] +for s in mem_list: + mem_list_names.append(s.split(':')[0]) + +if mem_list_names[2] != 'MemAvailable': + errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied') + # exit(1) + +swap_total_index = mem_list_names.index('SwapTotal') +swap_free_index = swap_total_index + 1 + +mem_total = int(mem_list[0].split(':')[1][:-4]) + +# Get names from /proc/*/status to be able to get VmRSS and VmSwap values + +with open('/proc/self/status') as file: + status_list = file.readlines() + +status_names = [] +for s in status_list: + status_names.append(s.split(':')[0]) + +ppid_index = status_names.index('PPid') +vm_size_index = status_names.index('VmSize') +vm_rss_index = status_names.index('VmRSS') +vm_swap_index = status_names.index('VmSwap') +uid_index = status_names.index('Uid') +state_index = status_names.index('State') + + +try: + anon_index = status_names.index('RssAnon') + file_index = status_names.index('RssFile') + shmem_index = status_names.index('RssShmem') + detailed_rss = True + # print(detailed_rss, 'detailed_rss') +except ValueError: + detailed_rss = False + # print('It is not Linux 4.5+') + +########################################################################## + + +log('Config: ' + config) + + +########################################################################## + +# parsing the config with obtaining the parameters dictionary + +# conf_parameters_dict +# conf_restart_dict + +# dictionary with config options +config_dict = dict() + +processname_re_list = [] +cmdline_re_list = [] +environ_re_list = [] +uid_re_list = [] +cgroup_v1_re_list = [] +cgroup_v2_re_list = [] +realpath_re_list = [] + +soft_actions_list = [] + + +# separator for optional parameters (that starts with @) +opt_separator = '///' + + +# stupid conf parsing, need refactoring +try: + with open(config) as f: + + for line in f: + + a = line.startswith('#') + b = line.startswith('\n') + c = line.startswith('\t') + d = line.startswith(' ') + + etc = line.startswith('@SOFT_ACTION_RE_NAME') + etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1') + + if not a and not b and not c and not d and not etc and not etc2: + a = line.partition('=') + + key = a[0].strip() + value = a[2].strip() + + if key not in config_dict: + config_dict[key] = value + else: + log('ERROR: config key duplication: {}'.format(key)) + exit(1) + + if etc: + + a = line.partition('@SOFT_ACTION_RE_NAME')[ + 2].partition(opt_separator) + + a1 = 'name' + + a2 = a[0].strip() + valid_re(a2) + + a3 = a[2].strip() + + zzz = (a1, a2, a3) + + soft_actions_list.append(zzz) + + if etc2: + + a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[ + 2].partition(opt_separator) + + a1 = 'cgroup_v1' + + a2 = a[0].strip() + valid_re(a2) + + a3 = a[2].strip() + + zzz = (a1, a2, a3) + + soft_actions_list.append(zzz) + + if line.startswith('@PROCESSNAME_RE'): + a = line.partition( + '@PROCESSNAME_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + processname_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@CMDLINE_RE'): + a = line.partition( + '@CMDLINE_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cmdline_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@UID_RE'): + a = line.partition( + '@UID_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + uid_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@CGROUP_V1_RE'): + a = line.partition( + '@CGROUP_V1_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cgroup_v1_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@CGROUP_V2_RE'): + a = line.partition( + '@CGROUP_V2_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cgroup_v2_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@REALPATH_RE'): + a = line.partition( + '@REALPATH_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + realpath_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@ENVIRON_RE'): + a = line.partition( + '@ENVIRON_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + environ_re_list.append((badness_adj, reg_exp)) + + +except PermissionError: + errprint('PermissionError', conf_err_mess) + exit(1) +except UnicodeDecodeError: + errprint('UnicodeDecodeError', conf_err_mess) + exit(1) +except IsADirectoryError: + errprint('IsADirectoryError', conf_err_mess) + exit(1) +except IndexError: + errprint('IndexError', conf_err_mess) + exit(1) +except FileNotFoundError: + errprint('FileNotFoundError', conf_err_mess) + exit(1) + + +if processname_re_list == []: + regex_matching = False +else: + regex_matching = True + + +if cmdline_re_list == []: + re_match_cmdline = False +else: + re_match_cmdline = True + + +if uid_re_list == []: + re_match_uid = False +else: + re_match_uid = True + + +if environ_re_list == []: + re_match_environ = False +else: + re_match_environ = True + + +if realpath_re_list == []: + re_match_realpath = False +else: + re_match_realpath = True + + +if cgroup_v1_re_list == []: + re_match_cgroup_v1 = False +else: + re_match_cgroup_v1 = True + +if cgroup_v2_re_list == []: + re_match_cgroup_v2 = False +else: + re_match_cgroup_v2 = True + + +# print(processname_re_list) +# print(cmdline_re_list) +# print(uid_re_list) +# print(environ_re_list) +# print(realpath_re_list) +# print(cgroup_v1_re_list) +# print(cgroup_v2_re_list) + +# print(soft_actions_list) + +if soft_actions_list == []: + soft_actions = False +else: + soft_actions = True + +# print('soft_actions:', soft_actions) + +########################################################################## + + +# extracting parameters from the dictionary +# check for all necessary parameters +# validation of all parameters +psi_debug = conf_parse_bool('psi_debug') +print_total_stat = conf_parse_bool('print_total_stat') +print_proc_table = conf_parse_bool('print_proc_table') +forbid_negative_badness = conf_parse_bool('forbid_negative_badness') +print_victim_info = conf_parse_bool('print_victim_info') +print_config = conf_parse_bool('print_config') +print_mem_check_results = conf_parse_bool('print_mem_check_results') +print_sleep_periods = conf_parse_bool('print_sleep_periods') +gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings') +gui_notifications = conf_parse_bool('gui_notifications') +decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj') +ignore_psi = conf_parse_bool('ignore_psi') + +(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent + ) = calculate_percent('mem_min_sigterm') + +(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent + ) = calculate_percent('mem_min_sigkill') + +(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent + ) = calculate_percent('zram_max_sigterm') + +(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent + ) = calculate_percent('zram_max_sigkill') + +(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent + ) = calculate_percent('mem_min_warnings') + +(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent + ) = calculate_percent('zram_max_warnings') + + +if 'rate_mem' in config_dict: + rate_mem = string_to_float_convert_test(config_dict['rate_mem']) + if rate_mem is None: + errprint('Invalid rate_mem value, not float\nExit') + exit(1) + if rate_mem <= 0: + errprint('rate_mem MUST be > 0\nExit') + exit(1) +else: + errprint('rate_mem not in config\nExit') + exit(1) + + +if 'rate_swap' in config_dict: + rate_swap = string_to_float_convert_test(config_dict['rate_swap']) + if rate_swap is None: + errprint('Invalid rate_swap value, not float\nExit') + exit(1) + if rate_swap <= 0: + errprint('rate_swap MUST be > 0\nExit') + exit(1) +else: + errprint('rate_swap not in config\nExit') + exit(1) + + +if 'rate_zram' in config_dict: + rate_zram = string_to_float_convert_test(config_dict['rate_zram']) + if rate_zram is None: + errprint('Invalid rate_zram value, not float\nExit') + exit(1) + if rate_zram <= 0: + errprint('rate_zram MUST be > 0\nExit') + exit(1) +else: + errprint('rate_zram not in config\nExit') + exit(1) + + +if 'swap_min_sigterm' in config_dict: + swap_min_sigterm = config_dict['swap_min_sigterm'] +else: + errprint('swap_min_sigterm not in config\nExit') + exit(1) + + +if 'swap_min_sigkill' in config_dict: + swap_min_sigkill = config_dict['swap_min_sigkill'] +else: + errprint('swap_min_sigkill not in config\nExit') + exit(1) + + +if 'min_delay_after_sigterm' in config_dict: + min_delay_after_sigterm = string_to_float_convert_test( + config_dict['min_delay_after_sigterm']) + if min_delay_after_sigterm is None: + errprint('Invalid min_delay_after_sigterm value, not float\nExit') + exit(1) + if min_delay_after_sigterm < 0: + errprint('min_delay_after_sigterm must be positiv\nExit') + exit(1) +else: + errprint('min_delay_after_sigterm not in config\nExit') + exit(1) + + +if 'psi_post_action_delay' in config_dict: + psi_post_action_delay = string_to_float_convert_test( + config_dict['psi_post_action_delay']) + if psi_post_action_delay is None: + errprint('Invalid psi_post_action_delay value, not float\nExit') + exit(1) + if psi_post_action_delay < 0: + errprint('psi_post_action_delay must be positive\nExit') + exit(1) +else: + errprint('psi_post_action_delay not in config\nExit') + exit(1) + + +if 'sigkill_psi_threshold' in config_dict: + sigkill_psi_threshold = string_to_float_convert_test( + config_dict['sigkill_psi_threshold']) + if sigkill_psi_threshold is None: + errprint('Invalid sigkill_psi_threshold value, not float\nExit') + exit(1) + if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100: + errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit') + exit(1) +else: + errprint('sigkill_psi_threshold not in config\nExit') + exit(1) + + +if 'sigterm_psi_threshold' in config_dict: + sigterm_psi_threshold = string_to_float_convert_test( + config_dict['sigterm_psi_threshold']) + if sigterm_psi_threshold is None: + errprint('Invalid sigterm_psi_threshold value, not float\nExit') + exit(1) + if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100: + errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit') + exit(1) +else: + errprint('sigterm_psi_threshold not in config\nExit') + exit(1) + + +if 'min_badness' in config_dict: + min_badness = string_to_int_convert_test( + config_dict['min_badness']) + if min_badness is None: + errprint('Invalid min_badness value, not integer\nExit') + exit(1) + if min_badness < 0 or min_badness > 1000: + errprint('Invalud min_badness value\nExit') + exit(1) +else: + errprint('min_badness not in config\nExit') + exit(1) + + +if 'oom_score_adj_max' in config_dict: + oom_score_adj_max = string_to_int_convert_test( + config_dict['oom_score_adj_max']) + if oom_score_adj_max is None: + errprint('Invalid oom_score_adj_max value, not integer\nExit') + exit(1) + if oom_score_adj_max < 0 or oom_score_adj_max > 1000: + errprint('Invalid oom_score_adj_max value\nExit') + exit(1) +else: + errprint('oom_score_adj_max not in config\nExit') + exit(1) + + +if 'min_time_between_warnings' in config_dict: + min_time_between_warnings = string_to_float_convert_test( + config_dict['min_time_between_warnings']) + if min_time_between_warnings is None: + errprint('Invalid min_time_between_warnings value, not float\nExit') + exit(1) + if min_time_between_warnings < 1 or min_time_between_warnings > 300: + errprint('min_time_between_warnings value out of range [1; 300]\nExit') + exit(1) +else: + errprint('min_time_between_warnings not in config\nExit') + exit(1) + + +if 'swap_min_warnings' in config_dict: + swap_min_warnings = config_dict['swap_min_warnings'] +else: + errprint('swap_min_warnings not in config\nExit') + exit(1) + + +if 'max_ancestry_depth' in config_dict: + max_ancestry_depth = string_to_int_convert_test( + config_dict['max_ancestry_depth']) + if min_badness is None: + errprint('Invalid max_ancestry_depth value, not integer\nExit') + exit(1) + if max_ancestry_depth < 1: + errprint('Invalud max_ancestry_depth value\nExit') + exit(1) +else: + errprint('max_ancestry_depth is not in config\nExit') + exit(1) + + +if 'max_post_sigterm_victim_lifetime' in config_dict: + max_post_sigterm_victim_lifetime = string_to_float_convert_test( + config_dict['max_post_sigterm_victim_lifetime']) + if max_post_sigterm_victim_lifetime is None: + errprint('Invalid max_post_sigterm_victim_lifetime val' + 'ue, not float\nExit') + exit(1) + if max_post_sigterm_victim_lifetime < 0: + errprint('max_post_sigterm_victim_lifetime must be non-n' + 'egative number\nExit') + exit(1) +else: + errprint('max_post_sigterm_victim_lifetime is not in config\nExit') + exit(1) + + +if 'post_kill_exe' in config_dict: + post_kill_exe = config_dict['post_kill_exe'] +else: + errprint('post_kill_exe is not in config\nExit') + exit(1) + + +if 'psi_path' in config_dict: + psi_path = config_dict['psi_path'] +else: + errprint('psi_path is not in config\nExit') + exit(1) + + +if 'psi_metrics' in config_dict: + psi_metrics = config_dict['psi_metrics'] +else: + errprint('psi_metrics is not in config\nExit') + exit(1) + + +if 'warning_exe' in config_dict: + warning_exe = config_dict['warning_exe'] + if warning_exe != '': + check_warning_exe = True + else: + check_warning_exe = False +else: + errprint('warning_exe is not in config\nExit') + exit(1) + + +if 'extra_table_info' in config_dict: + extra_table_info = config_dict['extra_table_info'] + if (extra_table_info != 'None' and + extra_table_info != 'cgroup_v1' and + extra_table_info != 'cgroup_v2' and + extra_table_info != 'cmdline' and + extra_table_info != 'environ' and + extra_table_info != 'realpath' and + extra_table_info != 'All'): + + errprint('Invalid config: invalid extra_table_info value\nExit') + exit(1) +else: + errprint('Invalid config: extra_table_info is not in config\nExit') + exit(1) + + +separate_log = conf_parse_bool('separate_log') + +if separate_log: + + import logging + from logging import basicConfig + from logging import info + + log_dir = '/var/log/nohang' + + try: + os.mkdir(log_dir) + except PermissionError: + print('ERROR: can not create log dir') + except FileExistsError: + pass + + logfile = log_dir + '/nohang.log' + + try: + with open(logfile, 'a') as f: + pass + except FileNotFoundError: + print('ERROR: log FileNotFoundError') + except PermissionError: + print('ERROR: log PermissionError') + + try: + basicConfig( + filename=logfile, + level=logging.INFO, + format="%(asctime)s: %(message)s") + except PermissionError: + errprint('ERROR: Permission denied: {}'.format(logfile)) + except FileNotFoundError: + errprint('ERROR: FileNotFoundError: {}'.format(logfile)) + + +if 'min_mem_report_interval' in config_dict: + min_mem_report_interval = string_to_float_convert_test( + config_dict['min_mem_report_interval']) + if min_mem_report_interval is None: + errprint('Invalid min_mem_report_interval value, not float\nExit') + exit(1) + if min_mem_report_interval < 0: + errprint('min_mem_report_interval must be non-negative number\nExit') + exit(1) +else: + errprint('min_mem_report_interval is not in config\nExit') + exit(1) + + +if 'max_sleep' in config_dict: + max_sleep = string_to_float_convert_test( + config_dict['max_sleep']) + if max_sleep is None: + errprint('Invalid max_sleep value, not float\nExit') + exit(1) + if max_sleep <= 0: + errprint('max_sleep must be positive number\nExit') + exit(1) +else: + errprint('max_sleep is not in config\nExit') + exit(1) + + +if 'min_sleep' in config_dict: + min_sleep = string_to_float_convert_test( + config_dict['min_sleep']) + if min_sleep is None: + errprint('Invalid min_sleep value, not float\nExit') + exit(1) + if min_sleep <= 0: + errprint('min_sleep must be positive number\nExit') + exit(1) +else: + errprint('min_sleep is not in config\nExit') + exit(1) + + +if 'over_sleep' in config_dict: + over_sleep = string_to_float_convert_test( + config_dict['over_sleep']) + if over_sleep is None: + errprint('Invalid over_sleep value, not float\nExit') + exit(1) + if over_sleep <= 0: + errprint('over_sleep must be positive number\nExit') + exit(1) +else: + errprint('over_sleep is not in config\nExit') + exit(1) + + +if max_sleep < min_sleep: + errprint( + 'max_sleep value must not exceed min_sleep value.\nExit' + ) + exit(1) + + +if min_sleep < over_sleep: + errprint( + 'min_sleep value must not exceed over_sleep value.\nExit' + ) + exit(1) + + +if max_sleep == min_sleep: + stable_sleep = True +else: + stable_sleep = False + + +if print_proc_table_flag: + + if not root: + log('WARNING: effective UID != 0; euid={}; processes with other e' + 'uids will be invisible for nohang'.format(self_uid)) + + func_print_proc_table() + + +########################################################################## + + +psi_support = os.path.exists(psi_path) + + +########################################################################## + + +# Get KiB levels if it's possible. + + +def get_swap_threshold_tuple(string): + # re (Num %, True) or (Num KiB, False) + """Returns KiB value if abs val was set in config, or tuple with %""" + # return tuple with abs and bool: (abs %, True) or (abs MiB, False) + + if string.endswith('%'): + valid = string_to_float_convert_test(string[:-1]) + if valid is None: + errprint('somewhere swap unit is not float_%') + exit(1) + + value = float(string[:-1].strip()) + if value < 0 or value > 100: + errprint('invalid value, must be from the range[0; 100] %') + exit(1) + + return value, True + + elif string.endswith('M'): + valid = string_to_float_convert_test(string[:-1]) + if valid is None: + errprint('somewhere swap unit is not float_M') + exit(1) + + value = float(string[:-1].strip()) * 1024 + if value < 0: + errprint('invalid unit in config (negative value)') + exit(1) + + return value, False + + else: + errprint( + 'Invalid config file. There are invalid units somewhere\nExit') + exit(1) + + +swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm) +swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill) +swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings) + + +swap_term_is_percent = swap_min_sigterm_tuple[1] +if swap_term_is_percent: + swap_min_sigterm_percent = swap_min_sigterm_tuple[0] +else: + swap_min_sigterm_kb = swap_min_sigterm_tuple[0] + + +swap_kill_is_percent = swap_min_sigkill_tuple[1] +if swap_kill_is_percent: + swap_min_sigkill_percent = swap_min_sigkill_tuple[0] +else: + swap_min_sigkill_kb = swap_min_sigkill_tuple[0] + + +swap_warn_is_percent = swap_min_warnings_tuple[1] +if swap_warn_is_percent: + swap_min_warnings_percent = swap_min_warnings_tuple[0] +else: + swap_min_warnings_kb = swap_min_warnings_tuple[0] + + +########################################################################## + +# outdated section, need fixes + +if print_config: + + print( + '\n1. Memory levels to respond to as an OOM threat\n[display' + 'ing these options need fix]\n') + + print('mem_min_sigterm: {} MiB, {} %'.format( + round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1))) + print('mem_min_sigkill: {} MiB, {} %'.format( + round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1))) + + print('swap_min_sigterm: {}'.format(swap_min_sigterm)) + print('swap_min_sigkill: {}'.format(swap_min_sigkill)) + + print('zram_max_sigterm: {} MiB, {} %'.format( + round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1))) + print('zram_max_sigkill: {} MiB, {} %'.format( + round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1))) + + print('\n2. The frequency of checking the level of available m' + 'emory (and CPU usage)\n') + print('rate_mem: {}'.format(rate_mem)) + print('rate_swap: {}'.format(rate_swap)) + print('rate_zram: {}'.format(rate_zram)) + + print('\n3. The prevention of killing innocent victims\n') + print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm)) + print('min_badness: {}'.format(min_badness)) + + print('decrease_oom_score_adj: {}'.format( + decrease_oom_score_adj + )) + if decrease_oom_score_adj: + print('oom_score_adj_max: {}'.format(oom_score_adj_max)) + + print('\n4. Impact on the badness of processes via matching their' + ' names, cmdlines ir UIDs with regular expressions\n') + + print('(todo)') + + print('\n5. The execution of a specific command instead of sen' + 'ding the\nSIGTERM signal\n') + + print('\n6. GUI notifications:\n- OOM prevention results and\n- low m' + 'emory warnings\n') + print('gui_notifications: {}'.format(gui_notifications)) + + print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings)) + if gui_low_memory_warnings: + print('min_time_between_warnings: {}'.format( + min_time_between_warnings)) + + print('mem_min_warnings: {} MiB, {} %'.format( + round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1))) + + print('swap_min_warnings: {}'.format(swap_min_warnings)) + + print('zram_max_warnings: {} MiB, {} %'.format( + round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1))) + + print('\n7. Output verbosity\n') + print('print_config: {}'.format(print_config)) + print('print_mem_check_results: {}'.format(print_mem_check_results)) + print('print_sleep_periods: {}\n'.format(print_sleep_periods)) + + +########################################################################## + + +# for calculating the column width when printing mem and zram +mem_len = len(str(round(mem_total / 1024.0))) + +if gui_notifications: + notify_sig_dict = {SIGKILL: 'Killing', + SIGTERM: 'Terminating'} + + +# convert rates from MiB/s to KiB/s +rate_mem = rate_mem * 1024 +rate_swap = rate_swap * 1024 +rate_zram = rate_zram * 1024 + + +warn_time_now = 0 +warn_time_delta = 1000 +warn_timer = 0 + + +########################################################################## + + +if not root: + log('WARNING: effective UID != 0; euid={}; processes with other e' + 'uids will be invisible for nohang'.format(self_uid)) + + +# Try to lock all memory + +mlockall() + +########################################################################## + + +# print_self_rss() + + +log('Monitoring has started!') + +stdout.flush() + +########################################################################## + +psi_avg_string = '' # will be overwritten if PSI monitoring enabled + + +if psi_support and not ignore_psi: + psi_t0 = time() + + +if print_mem_check_results: + + # to find delta mem + wt2 = 0 + new_mem = 0 + + # init mem report interval + report0 = 0 + + +# handle signals +for i in sig_list: + signal(i, signal_handler) + + +while True: + + if psi_support and not ignore_psi: + + psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics) + + if print_mem_check_results: + psi_avg_string = 'PSI avg value: {} | '.format( + str(psi_avg_value).rjust(6)) + + if psi_avg_value >= sigkill_psi_threshold: + sigkill_psi_exceeded = True + else: + sigkill_psi_exceeded = False + + if psi_avg_value >= sigterm_psi_threshold: + sigterm_psi_exceeded = True + else: + sigterm_psi_exceeded = False + + if time() - psi_t0 >= psi_post_action_delay: + psi_post_action_delay_exceeded = True + else: + psi_post_action_delay_exceeded = False + + if psi_debug: + log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps' + 'i_post_action_delay_exceeded: {}'.format( + sigterm_psi_exceeded, + sigkill_psi_exceeded, + psi_post_action_delay_exceeded)) + + if sigkill_psi_exceeded and psi_post_action_delay_exceeded: + time0 = time() + mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \ + 'old ({})'.format( + psi_avg_value, sigkill_psi_threshold) + + implement_corrective_action(SIGKILL) + + psi_t0 = time() + continue + + if sigterm_psi_exceeded and psi_post_action_delay_exceeded: + time0 = time() + mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \ + 'shold ({})'.format(psi_avg_value, sigterm_psi_threshold) + + implement_corrective_action(SIGTERM) + + psi_t0 = time() + continue + + mem_available, swap_total, swap_free = check_mem_and_swap() + + # if swap_min_sigkill is set in percent + if swap_kill_is_percent: + swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0 + + if swap_term_is_percent: + swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0 + + if swap_warn_is_percent: + swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0 + + mem_used_zram = check_zram() + + if print_mem_check_results: + + wt1 = time() + + delta = (mem_available + swap_free) - new_mem + + t_cycle = wt1 - wt2 + + report_delta = wt1 - report0 + + if report_delta >= min_mem_report_interval: + + mem_report = True + new_mem = mem_available + swap_free + + report0 = wt1 + + else: + mem_report = False + + wt2 = time() + + if mem_report: + + speed = delta / 1024.0 / report_delta + speed_info = ' | dMem: {} M/s'.format( + str(round(speed)).rjust(5) + ) + + # Calculate 'swap-column' width + swap_len = len(str(round(swap_total / 1024.0))) + + # Output available mem sizes + if swap_total == 0 and mem_used_zram == 0: + log('{}MemAvail: {} M, {} %{}'.format( + psi_avg_string, + human(mem_available, mem_len), + just_percent_mem(mem_available / mem_total), + speed_info + ) + ) + + elif swap_total > 0 and mem_used_zram == 0: + log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format( + psi_avg_string, + human(mem_available, mem_len), + just_percent_mem(mem_available / mem_total), + human(swap_free, swap_len), + just_percent_swap(swap_free / (swap_total + 0.1)), + speed_info + ) + ) + + else: + log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem' + 'UsedZram: {} M, {} %{}'.format( + psi_avg_string, + human(mem_available, mem_len), + just_percent_mem(mem_available / mem_total), + human(swap_free, swap_len), + just_percent_swap(swap_free / (swap_total + 0.1)), + human(mem_used_zram, mem_len), + just_percent_mem(mem_used_zram / mem_total), + speed_info + ) + ) + + if swap_total > swap_min_sigkill_kb: + swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1)) + else: + swap_sigkill_pc = '-' + + if swap_total > swap_min_sigterm_kb: + swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1)) + else: + swap_sigterm_pc = '-' + + # MEM SWAP KILL + if (mem_available <= mem_min_sigkill_kb and + swap_free <= swap_min_sigkill_kb): + time0 = time() + + mem_info = 'Hard threshold exceeded\nMemory status that requ' \ + 'ires corrective actions:' \ + '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ + 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ + 'p_min_sigkill [{} MiB, {} %]'.format( + kib_to_mib(mem_available), + percent(mem_available / mem_total), + kib_to_mib(mem_min_sigkill_kb), + percent(mem_min_sigkill_kb / mem_total), + kib_to_mib(swap_free), + percent(swap_free / (swap_total + 0.1)), + kib_to_mib(swap_min_sigkill_kb), + swap_sigkill_pc) + + implement_corrective_action(SIGKILL) + + psi_t0 = time() + continue + + # ZRAM KILL + if mem_used_zram >= zram_max_sigkill_kb: + time0 = time() + + mem_info = 'Hard threshold exceeded\nMemory status that requir' \ + 'es corrective actions:' \ + '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \ + 'kill [{} MiB, {} %]'.format( + kib_to_mib(mem_used_zram), + percent(mem_used_zram / mem_total), + kib_to_mib(zram_max_sigkill_kb), + percent(zram_max_sigkill_kb / mem_total)) + + implement_corrective_action(SIGKILL) + + psi_t0 = time() + continue + + # MEM SWAP TERM + if mem_available <= mem_min_sigterm_kb and \ + swap_free <= swap_min_sigterm_kb: + + time0 = time() + + mem_info = 'Soft threshold exceeded\nMemory status that requi' \ + 'res corrective actions:' \ + '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ + 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ + 'p_min_sigterm [{} MiB, {} %]'.format( + kib_to_mib(mem_available), + percent(mem_available / mem_total), + kib_to_mib(mem_min_sigterm_kb), + round(mem_min_sigterm_percent, 1), + kib_to_mib(swap_free), + percent(swap_free / (swap_total + 0.1)), + kib_to_mib(swap_min_sigterm_kb), + swap_sigterm_pc) + + implement_corrective_action(SIGTERM) + + psi_t0 = time() + continue + + # ZRAM TERM + if mem_used_zram >= zram_max_sigterm_kb: + time0 = time() + + mem_info = 'Soft threshold exceeded\nMemory status that requ' \ + 'ires corrective actions:' \ + '\n MemUsedZram [{} MiB, {} %] >= ' \ + 'zram_max_sigterm [{} M, {} %]'.format( + kib_to_mib(mem_used_zram), + percent(mem_used_zram / mem_total), + kib_to_mib(zram_max_sigterm_kb), + percent(zram_max_sigterm_kb / mem_total)) + + implement_corrective_action(SIGTERM) + + psi_t0 = time() + continue + + # LOW MEMORY WARNINGS + if gui_low_memory_warnings: + + if mem_available <= mem_min_warnings_kb and \ + swap_free <= swap_min_warnings_kb + 0.1 or \ + mem_used_zram >= zram_max_warnings_kb: + warn_time_delta = time() - warn_time_now + warn_time_now = time() + warn_timer += warn_time_delta + if warn_timer > min_time_between_warnings: + send_notify_warn() + warn_timer = 0 + + + + + # SLEEP BETWEEN MEM CHECKS + sleep_after_check_mem() + + + + + + + + + +