From 9b0b15db5b6219acf6a0688e749c653a9dea70e1 Mon Sep 17 00:00:00 2001 From: Alexey Avramov Date: Tue, 30 Apr 2019 18:33:34 +0900 Subject: [PATCH] fix style --- nohang | 191 +-- nohang.conf | 2 +- oom-sort | 14 +- trash/nohang 0.2 rc1 | 2946 ++++++++++++++++++++++++++++++++++++++++++ trash/oom-trigger | 6 +- trash/thanatolog | 7 +- trash/x01 | 124 ++ 7 files changed, 3103 insertions(+), 187 deletions(-) create mode 100755 trash/nohang 0.2 rc1 create mode 100755 trash/x01 diff --git a/nohang b/nohang index 66de8d0..9ffc02d 100755 --- a/nohang +++ b/nohang @@ -164,23 +164,15 @@ def log(*msg): print(*msg) except OSError: sleep(0.01) - # print('OSError in print(*msg)') - if separate_log: - # need fix: TypeError: not all arguments converted during string - # formatting - try: info(*msg) except OSError: sleep(0.01) - # print('OSError in info(*msg)') def print_version(): """ - сначала пытаться получ версию прямо из гита - вариант для неустановленых, - для тех, кто еще не запускал make install """ try: v = rline1('/etc/nohang/version') @@ -196,6 +188,7 @@ def print_version(): def test(): """ """ + print('\n(This option is not ready to use!)\n') print(version) print(argv) @@ -229,30 +222,9 @@ def test(): print("pid_to_state('2')") print(pid_to_state('2')) - ''' - print(hr) - print("update_stat_dict_and_print('key')") - print(update_stat_dict_and_print('key')) - - print(hr) - print("psi_mem_some_avg_total()") - print(psi_mem_some_avg_total()) - - print(hr) - print("psi_mem_some_avg10()") - print(psi_mem_some_avg10()) - - - - ''' - - print(hr) exit() -########################################################################## - - def pid_to_cgroup_v1(pid): """ """ @@ -672,12 +644,6 @@ def find_psi_metrics_value(psi_path, psi_metrics): return float(psi_list[1].split(' ')[3].split('=')[1]) -def check_mem(): - """find mem_available""" - # исправить название фции - return int(rline1('/proc/meminfo').split(':')[1][:-4]) - - def check_mem_and_swap(): """find mem_available, swap_total, swap_free""" with open('/proc/meminfo') as f: @@ -864,53 +830,6 @@ def send_notify_warn(): Look for process with maximum 'badness' and warn user with notification. (implement Low memory warnings) """ - - ''' - # find process with max badness - fat_tuple = find_victim() - pid = fat_tuple[0] - name = pid_to_name(pid) - - if mem_used_zram > 0: - low_mem_percent = '{}% {}% {}%'.format( - round(mem_available / mem_total * 100), - round(swap_free / (swap_total + 0.1) * 100), - round(mem_used_zram / mem_total * 100)) - elif swap_free > 0: - low_mem_percent = '{}% {}%'.format( - round(mem_available / mem_total * 100), - round(swap_free / (swap_total + 0.1) * 100)) - else: - low_mem_percent = '{}%'.format( - round(mem_available / mem_total * 100)) - - # title = 'Low memory: {}'.format(low_mem_percent) - title = 'Low memory' - ''' - - ''' - body2 = 'Next victim: {}[{}]'.format( - name.replace( - # symbol '&' can break notifications in some themes, - # therefore it is replaced by '*' - '&', '*'), - pid - ) - ''' - - ''' - body = 'MemAvail: {}%\nSwapFree: {}%'.format( - round(mem_available / mem_total * 100), - round(swap_free / (swap_total + 0.1) * 100)) - - if root: # If nohang was started by root - # send notification to all active users with special script - notify_helper(title, body) - else: # Or by regular user - # send notification to user that runs this nohang - notify_send_wait(title, body) - ''' - log('Warning threshold exceeded') if check_warning_exe: @@ -1058,7 +977,7 @@ def find_victim(_print_proc_table): non_decimal_list = get_non_decimal_pids() for i in non_decimal_list: - if i in pid_list: # ???????????????????????????????????????????? + if i in pid_list: pid_list.remove(i) pid_badness_list = [] @@ -1091,9 +1010,11 @@ def find_victim(_print_proc_table): hr = '#' * 115 log(hr) - log('# PID PPID badness oom_score oom_score_adj eUID S VmSize VmRSS VmSwap Name {}'.format( - extra_table_title)) - log('#------- ------- ------- --------- ------------- ---------- - ------ ----- ------ --------------- --------') + log('# PID PPID badness oom_score oom_score_adj e' + 'UID S VmSize VmRSS VmSwap Name {}'.format( + extra_table_title)) + log('#------- ------- ------- --------- ------------- -------' + '--- - ------ ----- ------ --------------- --------') for pid in pid_list: @@ -1327,8 +1248,6 @@ def find_victim_info(pid, victim_badness, name): 'The victim died in the search process: ProcessLookupError') return None - # print((time() - status0) * 1000, 'status time') - len_vm = len(str(vm_size)) try: @@ -1343,9 +1262,7 @@ def find_victim_info(pid, victim_badness, name): 'The victim died in the search process: FileNotFoundError') return None - # te1 = time() ancestry = pid_to_ancestry(pid, max_ancestry_depth) - # print((time() - te1) * 1000, 'ms, ancestry') if detailed_rss: detailed_rss_info = ' (' \ @@ -1397,7 +1314,7 @@ def find_victim_info(pid, victim_badness, name): return victim_info -# для дедупликации уведомлений +# for warnings deduplication dick = dict() dick['v'] = [1, 2, 3, time()] @@ -1436,19 +1353,12 @@ def implement_corrective_action(signal): soft_match = False if soft_actions and signal is SIGTERM: - # если мягкий порог И список мягких не пуст: - # итерируемся по списку, ища мэтчинги. Есть совпадения - выполн - # команду и выход из цикла. name = pid_to_name(pid) cgroup_v1 = pid_to_cgroup_v1(pid) service = '' cgroup_v1_tail = cgroup_v1.rpartition('/')[2] - # log(cgroup_v1_tail) if cgroup_v1_tail.endswith('.service'): service = cgroup_v1_tail - # print('$SERVICE:', [service]) - # print('ИЩЕМ СОВПАДЕНИЯ ДЛЯ МЯГКИХ ДЕЙСТВИЙ') - # итерируемся по списку кортежей for i in soft_actions_list: unit = i[0] if unit == 'name': @@ -1457,10 +1367,9 @@ def implement_corrective_action(signal): u = cgroup_v1 regexp = i[1] command = i[2] - # print([u, regexp, command]) if search(regexp, u) is not None: - log("Regexp '{}' matches with {} '{}'".format(regexp, unit, u)) - # print('СОВПАДЕНИЕ НАЙДЕНО') + log("Regexp '{}' matches with {} '{}'".format( + regexp, unit, u)) soft_match = True break @@ -1552,10 +1461,6 @@ def implement_corrective_action(signal): exe(cmd) if gui_notifications: - - # min delay after same notification - # все не так. От этого вообще пол дедупликация . терминация - # один раз покажется при любом раскладе. delay_after_same_notify = 1 x = dick['v'] @@ -1567,11 +1472,7 @@ def implement_corrective_action(signal): # print(y[3] - x[3]) if x[0] == y[0] and x[1] == y[1] and x[2] == y[2]: - # print('совпадение имени, пид, сигнала') - - # сохр в словаре первре совпавшее время dt = y[3] - x[3] - # print(dt, 'dt') if dt < delay_after_same_notify: notif = False @@ -1669,7 +1570,7 @@ def sleep_after_check_mem(): try: stdout.flush() - except OSError: # OSError: [Errno 105] No buffer space available + except OSError: pass sleep(t) @@ -1741,8 +1642,6 @@ def calculate_percent(arg_key): print_proc_table_flag = False -# print(len(argv), argv) - if len(argv) == 1: if os.path.exists('./nohang.conf'): config = os.getcwd() + '/nohang.conf' @@ -1887,8 +1786,6 @@ try: if etc: - # это остаток строки без первого ключа. Содержит: регулярка /// - # команда a = line.partition('@SOFT_ACTION_RE_NAME')[ 2].partition(opt_separator) @@ -1901,14 +1798,10 @@ try: zzz = (a1, a2, a3) - # print(zzz) - soft_actions_list.append(zzz) if etc2: - # это остаток строки без первого ключа. Содержит: регулярка /// - # команда a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[ 2].partition(opt_separator) @@ -1921,8 +1814,6 @@ try: zzz = (a1, a2, a3) - # print(zzz) - soft_actions_list.append(zzz) if line.startswith('@PROCESSNAME_RE'): @@ -2040,16 +1931,16 @@ else: re_match_cgroup_v2 = True -# print(processname_re_list) -# print(cmdline_re_list) -# print(uid_re_list) -# print(environ_re_list) -# print(realpath_re_list) -# print(cgroup_v1_re_list) -# print(cgroup_v2_re_list) +print(processname_re_list) +print(cmdline_re_list) +print(uid_re_list) +print(environ_re_list) +print(realpath_re_list) +print(cgroup_v1_re_list) +print(cgroup_v2_re_list) -# print(soft_actions_list) +print(soft_actions_list) if soft_actions_list == []: soft_actions = False @@ -2077,21 +1968,6 @@ gui_notifications = conf_parse_bool('gui_notifications') decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj') ignore_psi = conf_parse_bool('ignore_psi') - -# regex_matching = conf_parse_bool('regex_matching') -# re_match_cmdline = conf_parse_bool('re_match_cmdline') -# re_match_uid = conf_parse_bool('re_match_uid') -# re_match_cgroup_v1 = conf_parse_bool('re_match_cgroup_v1') -# re_match_cgroup_v2 = conf_parse_bool('re_match_cgroup_v2') -# re_match_realpath = conf_parse_bool('re_match_realpath') -# re_match_environ = conf_parse_bool('re_match_environ') - - -# if regex_matching or re_match_cmdline or re_match_uid or re_match_cgroup -# or re_match_realpath: -# from re import search -# from sre_constants import error as invalid_re - (mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent ) = calculate_percent('mem_min_sigterm') @@ -2469,9 +2345,6 @@ psi_support = os.path.exists(psi_path) # Get KiB levels if it's possible. -# получ кб. если не кб - то процент. Если процент - находим кб ниже на -# основе полученного своптотал и процентов. - def get_swap_threshold_tuple(string): # re (Num %, True) or (Num KiB, False) @@ -2570,7 +2443,6 @@ if print_config: print('min_delay_after_sigkill: {}'.format(min_delay_after_sigkill)) print('min_badness: {}'.format(min_badness)) - # False (OK) - OK не нужен когда фолс print('decrease_oom_score_adj: {}'.format( decrease_oom_score_adj )) @@ -2633,14 +2505,11 @@ warn_timer = 0 ########################################################################## - - if not root: log('WARNING: effective UID != 0; euid={}; processes with other e' 'uids will be invisible for nohang'.format(self_uid)) - # Try to lock all memory mlockall() @@ -2648,13 +2517,9 @@ mlockall() ########################################################################## - print_self_rss() -# if print_proc_table: -# find_victim(print_proc_table) - log('Monitoring has started!') stdout.flush() @@ -2739,10 +2604,6 @@ while True: mem_available, swap_total, swap_free = check_mem_and_swap() - # print(mem_available, swap_total, swap_free) - - # если метры - получаем киб выше и сразу. см. - # if swap_min_sigkill is set in percent if swap_kill_is_percent: swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0 @@ -2753,9 +2614,6 @@ while True: if swap_warn_is_percent: swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0 - # в общем случае для работы нужны килобайты. Если в процентах задано - - # находим КБ тут, после получения своптотал. - mem_used_zram = check_zram() if print_mem_check_results: @@ -2825,8 +2683,7 @@ while True: ) ) - # если swap_min_sigkill задан в абсолютной величине и Swap_total = 0 - if swap_total > swap_min_sigkill_kb: # If swap_min_sigkill is absolute + if swap_total > swap_min_sigkill_kb: swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1)) else: swap_sigkill_pc = '-' @@ -2834,14 +2691,8 @@ while True: if swap_total > swap_min_sigterm_kb: swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1)) else: - - # печатать так: SwapTotal = 0, ignore swapspace swap_sigterm_pc = '-' - # это для печати меминфо. Все переработать нахрен. - - # далее пошла проверка превышения порогов - # MEM SWAP KILL if (mem_available <= mem_min_sigkill_kb and swap_free <= swap_min_sigkill_kb): @@ -2898,8 +2749,6 @@ while True: kib_to_mib(mem_available), percent(mem_available / mem_total), kib_to_mib(mem_min_sigterm_kb), - # percent(mem_min_sigterm_kb / mem_total), - # ОКРУГЛЯТЬ НА МЕСТЕ ВЫШЕ (или не выше, хз) round(mem_min_sigterm_percent, 1), kib_to_mib(swap_free), percent(swap_free / (swap_total + 0.1)), diff --git a/nohang.conf b/nohang.conf index a368572..d1b3271 100644 --- a/nohang.conf +++ b/nohang.conf @@ -142,7 +142,7 @@ min_badness = 20 Valid values are non-negative floating-point numbers. -min_delay_after_sigterm = 0.2 +min_delay_after_sigterm = 1 min_delay_after_sigkill = 1 Valid values are True and False. diff --git a/oom-sort b/oom-sort index e9accaf..4703fbd 100755 --- a/oom-sort +++ b/oom-sort @@ -8,7 +8,7 @@ from operator import itemgetter from os import listdir from argparse import ArgumentParser -"""#######################################################################79""" +########################################################################## # define funtcions @@ -63,7 +63,7 @@ def get_max_pid_len(): return len(line.strip()) -"""#######################################################################79""" +########################################################################## sort_dict = { @@ -78,7 +78,7 @@ sort_dict = { } -"""#######################################################################79""" +########################################################################## # parse CLI args @@ -129,7 +129,7 @@ if sort_by not in sort_dict: exit() -"""#######################################################################79""" +########################################################################## # find VmRSS, VmSwap and UID positions in /proc/*/status for further # searching positions of UID, VmRSS and VmSwap in each process @@ -146,7 +146,7 @@ vm_rss_index = status_names.index('VmRSS') vm_swap_index = status_names.index('VmSwap') -"""#######################################################################79""" +########################################################################## # get sorted list with pid, oom_score, oom_score_adj, cmdline # get status units: name, uid, rss, swap @@ -187,7 +187,7 @@ oom_list_sorted = sorted( oom_list, key=itemgetter(int(sort_dict[sort_by])), reverse=True) -"""#######################################################################79""" +########################################################################## # find width of columns @@ -206,7 +206,7 @@ if max_vm_rss_len < 5: max_vm_rss_len = 5 -"""#######################################################################79""" +########################################################################## # print output diff --git a/trash/nohang 0.2 rc1 b/trash/nohang 0.2 rc1 new file mode 100755 index 0000000..66de8d0 --- /dev/null +++ b/trash/nohang 0.2 rc1 @@ -0,0 +1,2946 @@ +#!/usr/bin/env python3 +"""A daemon that prevents OOM in Linux systems.""" + +import os +from ctypes import CDLL +from time import sleep, time +from operator import itemgetter +from sys import stdout, stderr, argv, exit, version +from signal import (signal, + SIGKILL, SIGTERM, SIGINT, SIGQUIT, + SIGHUP, SIGABRT, SIGSEGV, SIGBUS) +from re import search +from sre_constants import error as invalid_re + +start_time = time() + + +help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG] + +optional arguments: + -h, --help show this help message and exit + -v, --version print version + -t, --test print some tests + -p, --print-proc-table + print table of processes with their badness values + -c CONFIG, --config CONFIG + path to the config file, default values: + ./nohang.conf, /etc/nohang/nohang.conf""" + + +SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK']) + +conf_err_mess = 'Invalid config. Exit.' + +sig_dict = {SIGKILL: 'SIGKILL', + SIGTERM: 'SIGTERM'} + +self_pid = str(os.getpid()) + +self_uid = os.geteuid() + +if self_uid == 0: + root = True +else: + root = False + + +if os.path.exists('./nohang_notify_helper'): + notify_helper_path = './nohang_notify_helper' +else: + notify_helper_path = '/usr/sbin/nohang_notify_helper' + + +victim_dict = dict() + + +# will store corrective actions stat +stat_dict = dict() + + +separate_log = False # will be overwritten after parse config + + +def find_cgroup_indexes(): + """ Find cgroup-line positions in /proc/*/cgroup file. + """ + + cgroup_v1_index = None + cgroup_v2_index = None + + with open('/proc/self/cgroup') as f: + for index, line in enumerate(f): + if ':name=' in line: + cgroup_v1_index = index + if line.startswith('0::'): + cgroup_v2_index = index + + return cgroup_v1_index, cgroup_v2_index + + +cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes() + + +########################################################################## + +# define functions + + +def self_rss(): + """ + """ + return pid_to_status(self_pid)[5] + + +def print_self_rss(): + """ + """ + log('Self RSS: {} MiB'.format(self_rss())) + + +def signal_handler(signum, frame): + log('Got signal {}'.format(signum)) + update_stat_dict_and_print(None) + log('Exit') + exit() + + +def write(path, string): + """ + """ + with open(path, 'w') as f: + f.write(string) + + +def write_self_oom_score_adj(new_value): + """ + """ + if root: + write('/proc/self/oom_score_adj', new_value) + + +self_oom_score_adj_min = '-600' +self_oom_score_adj_max = '-6' + + +write_self_oom_score_adj(self_oom_score_adj_min) + + +def exe(cmd): + """ + """ + log('Execute the command: {}'.format(cmd)) + t0 = time() + write_self_oom_score_adj(self_oom_score_adj_max) + err = os.system(cmd) + write_self_oom_score_adj(self_oom_score_adj_min) + dt = time() - t0 + log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3))) + return err + + +def valid_re(reg_exp): + """Validate regular expression. + """ + try: + search(reg_exp, '') + except invalid_re: + log('Invalid config: invalid regexp: {}'.format(reg_exp)) + exit(1) + + +def func_print_proc_table(): + """ + """ + print_proc_table = True + find_victim(print_proc_table) + exit() + + +def log(*msg): + """ + """ + try: + print(*msg) + except OSError: + sleep(0.01) + # print('OSError in print(*msg)') + + if separate_log: + # need fix: TypeError: not all arguments converted during string + # formatting + + try: + info(*msg) + except OSError: + sleep(0.01) + # print('OSError in info(*msg)') + + +def print_version(): + """ + сначала пытаться получ версию прямо из гита - вариант для неустановленых, + для тех, кто еще не запускал make install + """ + try: + v = rline1('/etc/nohang/version') + except FileNotFoundError: + v = None + if v is None: + print('Nohang unknown version') + else: + print('Nohang ' + v) + exit() + + +def test(): + """ + """ + + print(version) + print(argv) + + hr = '==================================' + print(hr) + print("uptime()") + print(uptime()) + + print(hr) + print("os.uname()") + print(os.uname()) + + print(hr) + print("pid_to_starttime('self')") + print(pid_to_starttime('self')) + + print(hr) + print("get_victim_id('self')") + print(get_victim_id('self')) + + print(hr) + print("errprint('test')") + print(errprint('test')) + + print(hr) + print("mlockall()") + print(mlockall()) + + print(hr) + print("pid_to_state('2')") + print(pid_to_state('2')) + + ''' + print(hr) + print("update_stat_dict_and_print('key')") + print(update_stat_dict_and_print('key')) + + print(hr) + print("psi_mem_some_avg_total()") + print(psi_mem_some_avg_total()) + + print(hr) + print("psi_mem_some_avg10()") + print(psi_mem_some_avg10()) + + + + ''' + + print(hr) + exit() + + +########################################################################## + + +def pid_to_cgroup_v1(pid): + """ + """ + cgroup_v1 = '' + try: + with open('/proc/' + pid + '/cgroup') as f: + for index, line in enumerate(f): + if index == cgroup_v1_index: + cgroup_v1 = '/' + line.partition('/')[2][:-1] + return cgroup_v1 + except FileNotFoundError: + return '' + + +def pid_to_cgroup_v2(pid): + """ + """ + cgroup_v2 = '' + try: + with open('/proc/' + pid + '/cgroup') as f: + for index, line in enumerate(f): + if index == cgroup_v2_index: + cgroup_v2 = line[3:-1] + return cgroup_v2 + except FileNotFoundError: + return '' + + +def pid_to_starttime(pid): + """ handle FNF error! + """ + try: + starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[ + 2].split(' ')[20] + + except UnicodeDecodeError: + # print('LOL') + with open('/proc/' + pid + '/stat', 'rb') as f: + starttime = f.read().decode('utf-8', 'ignore').rpartition( + ')')[2].split(' ')[20] + + return float(starttime) / SC_CLK_TCK + + +def get_victim_id(pid): + """victim_id is starttime + pid""" + try: + return rline1('/proc/' + pid + '/stat').rpartition( + ')')[2].split(' ')[20] + pid + except FileNotFoundError: + return '' + + +def pid_to_state(pid): + """ Handle FNF error! (BTW it already handled in find_victim_info()) + """ + return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1] + + +def pid_to_name(pid): + """ + """ + try: + with open('/proc/' + pid + '/comm', 'rb') as f: + return f.read().decode('utf-8', 'ignore')[:-1] + except FileNotFoundError: + return '' + except ProcessLookupError: + return '' + + +def pid_to_ppid(pid): + """ + """ + try: + with open('/proc/' + pid + '/status') as f: + for n, line in enumerate(f): + if n is ppid_index: + return line.split('\t')[1].strip() + except FileNotFoundError: + return '' + except ProcessLookupError: + return '' + except UnicodeDecodeError: + with open('/proc/' + pid + '/status', 'rb') as f: + f_list = f.read().decode('utf-8', 'ignore').split('\n') + for i in range(len(f_list)): + if i is ppid_index: + return f_list[i].split('\t')[1] + + +def pid_to_ancestry(pid, max_ancestry_depth=1): + """ + """ + if max_ancestry_depth == 1: + ppid = pid_to_ppid(pid) + pname = pid_to_name(ppid) + return '\n PPID: {} ({})'.format(ppid, pname) + if max_ancestry_depth == 0: + return '' + anc_list = [] + for i in range(max_ancestry_depth): + ppid = pid_to_ppid(pid) + pname = pid_to_name(ppid) + anc_list.append((ppid, pname)) + if ppid == '1': + break + pid = ppid + a = '' + for i in anc_list: + a = a + ' <= PID {} ({})'.format(i[0], i[1]) + return '\n Ancestry: ' + a[4:] + + +def pid_to_cmdline(pid): + """ + Get process cmdline by pid. + + pid: str pid of required process + returns string cmdline + """ + try: + with open('/proc/' + pid + '/cmdline') as f: + return f.read().replace('\x00', ' ').rstrip() + except FileNotFoundError: + return '' + + +def pid_to_environ(pid): + """ + Get process environ by pid. + + pid: str pid of required process + returns string environ + """ + try: + with open('/proc/' + pid + '/environ') as f: + return f.read().replace('\x00', ' ').rstrip() + except FileNotFoundError: + return '' + + +def pid_to_realpath(pid): + try: + return os.path.realpath('/proc/' + pid + '/exe') + except FileNotFoundError: + return '' + + +def pid_to_uid(pid): + """return euid""" + try: + with open('/proc/' + pid + '/status') as f: + for n, line in enumerate(f): + if n is uid_index: + return line.split('\t')[2] + except UnicodeDecodeError: + with open('/proc/' + pid + '/status', 'rb') as f: + f_list = f.read().decode('utf-8', 'ignore').split('\n') + return f_list[uid_index].split('\t')[2] + except FileNotFoundError: + return '' + + +def pid_to_badness(pid): + """Find and modify badness (if it needs).""" + + try: + + oom_score = int(rline1('/proc/' + pid + '/oom_score')) + badness = oom_score + + if decrease_oom_score_adj: + oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj')) + if badness > oom_score_adj_max and oom_score_adj > 0: + badness = badness - oom_score_adj + oom_score_adj_max + + if regex_matching: + name = pid_to_name(pid) + for re_tup in processname_re_list: + if search(re_tup[1], name) is not None: + badness += int(re_tup[0]) + + if re_match_cgroup_v1: + cgroup_v1 = pid_to_cgroup_v1(pid) + for re_tup in cgroup_v1_re_list: + if search(re_tup[1], cgroup_v1) is not None: + badness += int(re_tup[0]) + + if re_match_cgroup_v2: + cgroup_v2 = pid_to_cgroup_v2(pid) + for re_tup in cgroup_v2_re_list: + if search(re_tup[1], cgroup_v2) is not None: + badness += int(re_tup[0]) + + if re_match_realpath: + realpath = pid_to_realpath(pid) + for re_tup in realpath_re_list: + if search(re_tup[1], realpath) is not None: + badness += int(re_tup[0]) + + if re_match_cmdline: + cmdline = pid_to_cmdline(pid) + for re_tup in cmdline_re_list: + if search(re_tup[1], cmdline) is not None: + badness += int(re_tup[0]) + + if re_match_environ: + environ = pid_to_environ(pid) + for re_tup in environ_re_list: + if search(re_tup[1], environ) is not None: + badness += int(re_tup[0]) + + if re_match_uid: + uid = pid_to_uid(pid) + for re_tup in uid_re_list: + if search(re_tup[1], uid) is not None: + badness += int(re_tup[0]) + + if forbid_negative_badness: + if badness < 0: + badness = 0 + + return badness, oom_score + + except FileNotFoundError: + return None, None + except ProcessLookupError: + return None, None + + +def pid_to_status(pid): + """ + """ + + try: + + with open('/proc/' + pid + '/status') as f: + + for n, line in enumerate(f): + + if n is 0: + name = line.split('\t')[1][:-1] + + if n is state_index: + state = line.split('\t')[1][0] + continue + + if n is ppid_index: + ppid = line.split('\t')[1][:-1] + continue + + if n is uid_index: + uid = line.split('\t')[2] + continue + + if n is vm_size_index: + vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) + continue + + if n is vm_rss_index: + vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) + continue + + if n is vm_swap_index: + vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) + break + + return name, state, ppid, uid, vm_size, vm_rss, vm_swap + + except UnicodeDecodeError: + return pid_to_status_unicode(pid) + + except FileNotFoundError: + return None + + except ProcessLookupError: + return None + + except ValueError: + return None + + +def pid_to_status_unicode(pid): + """ + """ + try: + + with open('/proc/' + pid + '/status', 'rb') as f: + f_list = f.read().decode('utf-8', 'ignore').split('\n') + + for i in range(len(f_list)): + + if i is 0: + name = f_list[i].split('\t')[1] + + if i is state_index: + state = f_list[i].split('\t')[1][0] + + if i is ppid_index: + ppid = f_list[i].split('\t')[1] + + if i is uid_index: + uid = f_list[i].split('\t')[2] + + if i is vm_size_index: + vm_size = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is vm_rss_index: + vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) + + if i is vm_swap_index: + vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) + + return name, state, ppid, uid, vm_size, vm_rss, vm_swap + + except FileNotFoundError: + return None + + except ProcessLookupError: + return None + + except ValueError: + return None + + +########################################################################## + + +def uptime(): + """ + """ + return float(rline1('/proc/uptime').split(' ')[0]) + + +def errprint(*text): + """ + """ + print(*text, file=stderr, flush=True) + + +def mlockall(): + """Lock all memory to prevent swapping nohang process.""" + + MCL_CURRENT = 1 + MCL_FUTURE = 2 + MCL_ONFAULT = 4 + + libc = CDLL('libc.so.6', use_errno=True) + + result = libc.mlockall( + MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT + ) + if result != 0: + result = libc.mlockall( + MCL_CURRENT | MCL_FUTURE + ) + if result != 0: + log('WARNING: cannot lock all memory') + else: + log('All memory locked with MCL_CURRENT | MCL_FUTURE') + else: + log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT') + + +def update_stat_dict_and_print(key): + """ + """ + + if key is not None: + + if key not in stat_dict: + + stat_dict.update({key: 1}) + + else: + + new_value = stat_dict[key] + 1 + stat_dict.update({key: new_value}) + + if print_total_stat: + + stats_msg = 'Total stat (what happened in the last {}):'.format( + format_time(time() - start_time)) + + for i in stat_dict: + stats_msg += '\n {}: {}'.format(i, stat_dict[i]) + + log(stats_msg) + + +def find_psi_metrics_value(psi_path, psi_metrics): + """ + """ + + if psi_support: + + if psi_metrics == 'some_avg10': + return float(rline1(psi_path).split(' ')[1].split('=')[1]) + if psi_metrics == 'some_avg60': + return float(rline1(psi_path).split(' ')[2].split('=')[1]) + if psi_metrics == 'some_avg300': + return float(rline1(psi_path).split(' ')[3].split('=')[1]) + + if psi_metrics == 'full_avg10': + with open(psi_path) as f: + psi_list = f.readlines() + return float(psi_list[1].split(' ')[1].split('=')[1]) + if psi_metrics == 'full_avg60': + with open(psi_path) as f: + psi_list = f.readlines() + return float(psi_list[1].split(' ')[2].split('=')[1]) + if psi_metrics == 'full_avg300': + with open(psi_path) as f: + psi_list = f.readlines() + return float(psi_list[1].split(' ')[3].split('=')[1]) + + +def check_mem(): + """find mem_available""" + # исправить название фции + return int(rline1('/proc/meminfo').split(':')[1][:-4]) + + +def check_mem_and_swap(): + """find mem_available, swap_total, swap_free""" + with open('/proc/meminfo') as f: + for n, line in enumerate(f): + if n is 2: + mem_available = int(line.split(':')[1][:-4]) + continue + if n is swap_total_index: + swap_total = int(line.split(':')[1][:-4]) + continue + if n is swap_free_index: + swap_free = int(line.split(':')[1][:-4]) + break + return mem_available, swap_total, swap_free + + +def check_zram(): + """find MemUsedZram""" + disksize_sum = 0 + mem_used_total_sum = 0 + + for dev in os.listdir('/sys/block'): + if dev.startswith('zram'): + stat = zram_stat(dev) + disksize_sum += int(stat[0]) + mem_used_total_sum += int(stat[1]) + + # Means that when setting zram disksize = 1 GiB available memory + # decrease by 0.0042 GiB. + # Found experimentally, requires clarification with different kernaels and + # architectures. + # On small disk drives (up to gigabyte) it can be more, up to 0.0045. + # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should + # be 0.001: + # ("zram uses about 0.1% of the size of the disk" + # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt), + # but this statement contradicts the experimental data. + # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize + # Found experimentally. + ZRAM_DISKSIZE_FACTOR = 0.0042 + + return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0 + + +def format_time(t): + """ + """ + t = int(t) + if t < 60: + return '{} sec'.format(t) + elif t >= 60 and t < 3600: + m = t // 60 + s = t % 60 + return '{} min {} sec'.format(m, s) + else: + h = t // 3600 + s0 = t - h * 3600 + m = s0 // 60 + s = s0 % 60 + return '{} h {} min {} sec'.format(h, m, s) + + +def string_to_float_convert_test(string): + """Try to interprete string values as floats.""" + try: + return float(string) + except ValueError: + return None + + +def string_to_int_convert_test(string): + """Try to interpret string values as integers.""" + try: + return int(string) + except ValueError: + return None + + +def conf_parse_string(param): + """ + Get string parameters from the config dict. + + param: config_dict key + returns config_dict[param].strip() + """ + if param in config_dict: + return config_dict[param].strip() + else: + errprint('All the necessary parameters must be in the config') + errprint('There is no "{}" parameter in the config'.format(param)) + exit(1) + + +def conf_parse_bool(param): + """ + Get bool parameters from the config_dict. + + param: config_dicst key + returns bool + """ + if param in config_dict: + param_str = config_dict[param] + if param_str == 'True': + return True + elif param_str == 'False': + return False + else: + errprint('Invalid value of the "{}" parameter.'.format(param)) + errprint('Valid values are True and False.') + errprint('Exit') + exit(1) + else: + errprint('All the necessary parameters must be in the config') + errprint('There is no "{}" parameter in the config'.format(param)) + exit(1) + + +def rline1(path): + """read 1st line from path.""" + try: + with open(path) as f: + for line in f: + return line[:-1] + except UnicodeDecodeError: + # print('UDE rline1', path) + with open(path, 'rb') as f: + return f.read(999).decode( + 'utf-8', 'ignore').split('\n')[0] # use partition()! + + +def kib_to_mib(num): + """Convert KiB values to MiB values.""" + return round(num / 1024.0) + + +def percent(num): + """Interprete num as percentage.""" + return round(num * 100, 1) + + +def just_percent_mem(num): + """convert num to percent and justify""" + return str(round(num * 100, 1)).rjust(4, ' ') + + +def just_percent_swap(num): + """ + """ + return str(round(num * 100, 1)).rjust(5, ' ') + + +def human(num, lenth): + """Convert KiB values to MiB values with right alignment""" + return str(round(num / 1024)).rjust(lenth, ' ') + + +def zram_stat(zram_id): + """ + Get zram state. + + zram_id: str zram block-device id + returns bytes diskcize, str mem_used_total + """ + try: + disksize = rline1('/sys/block/' + zram_id + '/disksize') + except FileNotFoundError: + return '0', '0' + if disksize == ['0\n']: + return '0', '0' + try: + mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ') + mm_stat_list = [] + for i in mm_stat: + if i != '': + mm_stat_list.append(i) + mem_used_total = mm_stat_list[2] + except FileNotFoundError: + mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total') + return disksize, mem_used_total # BYTES, str + + +def send_notify_warn(): + """ + Look for process with maximum 'badness' and warn user with notification. + (implement Low memory warnings) + """ + + ''' + # find process with max badness + fat_tuple = find_victim() + pid = fat_tuple[0] + name = pid_to_name(pid) + + if mem_used_zram > 0: + low_mem_percent = '{}% {}% {}%'.format( + round(mem_available / mem_total * 100), + round(swap_free / (swap_total + 0.1) * 100), + round(mem_used_zram / mem_total * 100)) + elif swap_free > 0: + low_mem_percent = '{}% {}%'.format( + round(mem_available / mem_total * 100), + round(swap_free / (swap_total + 0.1) * 100)) + else: + low_mem_percent = '{}%'.format( + round(mem_available / mem_total * 100)) + + # title = 'Low memory: {}'.format(low_mem_percent) + title = 'Low memory' + ''' + + ''' + body2 = 'Next victim: {}[{}]'.format( + name.replace( + # symbol '&' can break notifications in some themes, + # therefore it is replaced by '*' + '&', '*'), + pid + ) + ''' + + ''' + body = 'MemAvail: {}%\nSwapFree: {}%'.format( + round(mem_available / mem_total * 100), + round(swap_free / (swap_total + 0.1) * 100)) + + if root: # If nohang was started by root + # send notification to all active users with special script + notify_helper(title, body) + else: # Or by regular user + # send notification to user that runs this nohang + notify_send_wait(title, body) + ''' + + log('Warning threshold exceeded') + + if check_warning_exe: + exe(warning_exe) + + else: + + title = 'Low memory' + + body = 'MemAvail: {}%\nSwapFree: {}%'.format( + round(mem_available / mem_total * 100), + round(swap_free / (swap_total + 0.1) * 100) + ) + + send_notification(title, body) + + +def send_notify(signal, name, pid): + """ + Notificate about OOM Preventing. + + signal: key for notify_sig_dict + name: str process name + pid: str process pid + """ + + # wait for memory release after corrective action + # may be useful if free memory was about 0 immediately after + # corrective action + sleep(0.05) + + title = 'Freeze prevention' + body = '{} [{}] {}'.format( + notify_sig_dict[signal], + pid, + name.replace( + # symbol '&' can break notifications in some themes, + # therefore it is replaced by '*' + '&', '*' + ) + ) + + send_notification(title, body) + + +def send_notify_etc(pid, name, command): + """ + Notificate about OOM Preventing. + + command: str command that will be executed + name: str process name + pid: str process pid + """ + title = 'Freeze prevention' + body = 'Victim is [{}] {}\nExecute the co' \ + 'mmand:\n{}'.format( + pid, name.replace('&', '*'), command.replace('&', '*')) + + send_notification(title, body) + + +def send_notification(title, body): + """ + """ + split_by = '#' * 16 + + t000 = time() + + path_to_cache = '/dev/shm/nohang_notify_cache_uid{}_time{}'.format( + str(self_uid), t000 + ) + + text = '{}{}{}'.format(title, split_by, body) + + try: + with open(path_to_cache, 'w') as f: + f.write(text) + os.chmod(path_to_cache, 0o600) + except OSError: + log('OSError while send notification ' + '(No space left on device: /dev/shm)') + return None + + cmd = '{} --uid {} --time {} &'.format(notify_helper_path, self_uid, t000) + + exe(cmd) + + +def sleep_after_send_signal(signal): + """ + Sleeping after signal was sent. + + signal: sent signal + """ + if signal is SIGKILL: + if print_sleep_periods: + log(' sleep {}'.format(min_delay_after_sigkill)) + sleep(min_delay_after_sigkill) + else: + if print_sleep_periods: + log('Sleep {} sec after implementing a corrective action'.format( + min_delay_after_sigterm)) + sleep(min_delay_after_sigterm) + + +def get_pid_list(): + """ + Find pid list expect kthreads and zombies + """ + pid_list = [] + for pid in os.listdir('/proc'): + if os.path.exists('/proc/' + pid + '/exe') is True: + pid_list.append(pid) + return pid_list + + +pid_list = get_pid_list() + + +def get_non_decimal_pids(): + """ + """ + non_decimal_list = [] + for pid in pid_list: + if pid[0].isdecimal() is False: + non_decimal_list.append(pid) + return non_decimal_list + + +def find_victim(_print_proc_table): + """ + Find the process with highest badness and its badness adjustment + Return pid and badness + """ + + ft1 = time() + + pid_list = get_pid_list() + + pid_list.remove(self_pid) + + if '1' in pid_list: + pid_list.remove('1') + + non_decimal_list = get_non_decimal_pids() + + for i in non_decimal_list: + if i in pid_list: # ???????????????????????????????????????????? + pid_list.remove(i) + + pid_badness_list = [] + + if _print_proc_table: + + if extra_table_info == 'None': + extra_table_title = '' + + elif extra_table_info == 'cgroup_v1': + extra_table_title = 'CGroup_v1' + + elif extra_table_info == 'cgroup_v2': + extra_table_title = 'CGroup_v2' + + elif extra_table_info == 'cmdline': + extra_table_title = 'cmdline' + + elif extra_table_info == 'environ': + extra_table_title = 'environ' + + elif extra_table_info == 'realpath': + extra_table_title = 'realpath' + + elif extra_table_info == 'All': + extra_table_title = '[CGroup] [CmdLine] [RealPath]' + else: + extra_table_title = '' + + hr = '#' * 115 + + log(hr) + log('# PID PPID badness oom_score oom_score_adj eUID S VmSize VmRSS VmSwap Name {}'.format( + extra_table_title)) + log('#------- ------- ------- --------- ------------- ---------- - ------ ----- ------ --------------- --------') + + for pid in pid_list: + + badness = pid_to_badness(pid)[0] + + if badness is None: + continue + + if _print_proc_table: + + try: + oom_score = rline1('/proc/' + pid + '/oom_score') + oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') + except FileNotFoundError: + continue + + if pid_to_status(pid) is None: + continue + else: + (name, state, ppid, uid, vm_size, vm_rss, + vm_swap) = pid_to_status(pid) + + if extra_table_info == 'None': + extra_table_line = '' + + elif extra_table_info == 'cgroup_v1': + extra_table_line = pid_to_cgroup_v1(pid) + + elif extra_table_info == 'cgroup_v2': + extra_table_line = pid_to_cgroup_v2(pid) + + elif extra_table_info == 'cmdline': + extra_table_line = pid_to_cmdline(pid) + + elif extra_table_info == 'environ': + extra_table_line = pid_to_environ(pid) + + elif extra_table_info == 'realpath': + extra_table_line = pid_to_realpath(pid) + + elif extra_table_info == 'All': + extra_table_line = '[CG: {}] [CL: {}] [RP: {}]'.format( + pid_to_cgroup_v1(pid), + pid_to_cmdline(pid), + pid_to_realpath(pid) + ) + else: + extra_table_line = '' + + log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format( + pid.rjust(7), + ppid.rjust(7), + str(badness).rjust(7), + oom_score.rjust(9), + oom_score_adj.rjust(13), + uid.rjust(10), + state, + str(vm_size).rjust(6), + str(vm_rss).rjust(5), + str(vm_swap).rjust(6), + name.ljust(15), + extra_table_line + ) + ) + + pid_badness_list.append((pid, badness)) + + real_proc_num = len(pid_badness_list) + + # Make list of (pid, badness) tuples, sorted by 'badness' values + # print(pid_badness_list) + pid_tuple_list = sorted( + pid_badness_list, + key=itemgetter(1), + reverse=True + )[0] + + pid = pid_tuple_list[0] + + # Get maximum 'badness' value + victim_badness = pid_tuple_list[1] + victim_name = pid_to_name(pid) + + if _print_proc_table: + log(hr) + + log('Found {} processes with existing realpaths'.format(real_proc_num)) + + log( + 'Process with highest badness (found in {} ms):\n PID: {}, Na' + 'me: {}, badness: {}'.format( + round((time() - ft1) * 1000), + pid, + victim_name, + victim_badness + ) + ) + + return pid, victim_badness, victim_name + + +def find_victim_info(pid, victim_badness, name): + """ + """ + status0 = time() + + try: + + with open('/proc/' + pid + '/status') as f: + + for n, line in enumerate(f): + + if n is state_index: + state = line.split('\t')[1].rstrip() + continue + + if n is ppid_index: + ppid = line.split('\t')[1] + continue + + if n is uid_index: + uid = line.split('\t')[2] + continue + + if n is vm_size_index: + vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) + continue + + if n is vm_rss_index: + vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) + continue + + if detailed_rss: + + if n is anon_index: + anon_rss = kib_to_mib( + int(line.split('\t')[1][:-4])) + continue + + if n is file_index: + file_rss = kib_to_mib( + int(line.split('\t')[1][:-4])) + continue + + if n is shmem_index: + shmem_rss = kib_to_mib( + int(line.split('\t')[1][:-4])) + continue + + if n is vm_swap_index: + vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) + break + + cmdline = pid_to_cmdline(pid) + oom_score = rline1('/proc/' + pid + '/oom_score') + oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') + + except FileNotFoundError: + log('The victim died in the search process: FileNotFoundError') + update_stat_dict_and_print( + 'The victim died in the search process: FileNotFoundError') + return None + except ProcessLookupError: + log('The victim died in the search process: ProcessLookupError') + update_stat_dict_and_print( + 'The victim died in the search process: ProcessLookupError') + return None + except UnicodeDecodeError: + + with open('/proc/' + pid + '/status', 'rb') as f: + f_list = f.read().decode('utf-8', 'ignore').split('\n') + + for i in range(len(f_list)): + + if i is state_index: + state = f_list[i].split('\t')[1].rstrip() + + if i is ppid_index: + ppid = f_list[i].split('\t')[1] + + if i is uid_index: + uid = f_list[i].split('\t')[2] + + if i is vm_size_index: + vm_size = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is vm_rss_index: + vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) + + if detailed_rss: + + if i is anon_index: + anon_rss = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is file_index: + file_rss = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is shmem_index: + shmem_rss = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + if i is vm_swap_index: + vm_swap = kib_to_mib( + int(f_list[i].split('\t')[1][:-3])) + + cmdline = pid_to_cmdline(pid) + oom_score = rline1('/proc/' + pid + '/oom_score') + oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') + + except IndexError: + log('The victim died in the search process: IndexError') + update_stat_dict_and_print( + 'The victim died in the search process: IndexError') + return None + except ValueError: + log('The victim died in the search process: ValueError') + update_stat_dict_and_print( + 'The victim died in the search process: ValueError') + return None + except FileNotFoundError: + log('The victim died in the search process: FileNotFoundError') + update_stat_dict_and_print( + 'The victim died in the search process: FileNotFoundError') + return None + except ProcessLookupError: + log('The victim died in the search process: ProcessLookupError') + update_stat_dict_and_print( + 'The victim died in the search process: ProcessLookupError') + return None + + # print((time() - status0) * 1000, 'status time') + + len_vm = len(str(vm_size)) + + try: + realpath = os.path.realpath('/proc/' + pid + '/exe') + victim_lifetime = format_time(uptime() - pid_to_starttime(pid)) + victim_cgroup_v1 = pid_to_cgroup_v1(pid) + victim_cgroup_v2 = pid_to_cgroup_v2(pid) + + except FileNotFoundError: + print('The victim died in the search process: FileNotFoundError') + update_stat_dict_and_print( + 'The victim died in the search process: FileNotFoundError') + return None + + # te1 = time() + ancestry = pid_to_ancestry(pid, max_ancestry_depth) + # print((time() - te1) * 1000, 'ms, ancestry') + + if detailed_rss: + detailed_rss_info = ' (' \ + 'Anon: {} MiB, ' \ + 'File: {} MiB, ' \ + 'Shmem: {} MiB)'.format( + anon_rss, + file_rss, + shmem_rss) + else: + detailed_rss_info = '' + + victim_info = 'Victim information (found in {} ms):' \ + '\n Name: {}' \ + '\n State: {}' \ + '\n PID: {}' \ + '{}' \ + '\n EUID: {}' \ + '\n badness: {}, ' \ + 'oom_score: {}, ' \ + 'oom_score_adj: {}' \ + '\n VmSize: {} MiB' \ + '\n VmRSS: {} MiB {}' \ + '\n VmSwap: {} MiB' \ + '\n CGroup_v1: {}' \ + '\n CGroup_v2: {}' \ + '\n Realpath: {}' \ + '\n Cmdline: {}' \ + '\n Lifetime: {}'.format( + round((time() - status0) * 1000), + name, + state, + pid, + ancestry, + uid, + victim_badness, + oom_score, + oom_score_adj, + vm_size, + str(vm_rss).rjust(len_vm), + detailed_rss_info, + str(vm_swap).rjust(len_vm), + victim_cgroup_v1, + victim_cgroup_v2, + realpath, + cmdline, + victim_lifetime) + + return victim_info + + +# для дедупликации уведомлений +dick = dict() +dick['v'] = [1, 2, 3, time()] + + +def implement_corrective_action(signal): + """ + Find victim with highest badness and send SIGTERM/SIGKILL + """ + + notif = True + + log(mem_info) + + pid, victim_badness, name = find_victim(print_proc_table) + + if victim_badness >= min_badness: + + if print_victim_info: + victim_info = find_victim_info(pid, victim_badness, name) + log(victim_info) + + # kill the victim if it doesn't respond to SIGTERM + if signal is SIGTERM: + victim_id = get_victim_id(pid) + if victim_id not in victim_dict: + victim_dict.update({victim_id: time()}) + else: + if time() - victim_dict[ + victim_id] > max_post_sigterm_victim_lifetime: + print( + '\nmax_post_sigterm_victim_lifetime excee' + 'ded: the victim will get SIGKILL' + ) + signal = SIGKILL + + soft_match = False + + if soft_actions and signal is SIGTERM: + # если мягкий порог И список мягких не пуст: + # итерируемся по списку, ища мэтчинги. Есть совпадения - выполн + # команду и выход из цикла. + name = pid_to_name(pid) + cgroup_v1 = pid_to_cgroup_v1(pid) + service = '' + cgroup_v1_tail = cgroup_v1.rpartition('/')[2] + # log(cgroup_v1_tail) + if cgroup_v1_tail.endswith('.service'): + service = cgroup_v1_tail + # print('$SERVICE:', [service]) + # print('ИЩЕМ СОВПАДЕНИЯ ДЛЯ МЯГКИХ ДЕЙСТВИЙ') + # итерируемся по списку кортежей + for i in soft_actions_list: + unit = i[0] + if unit == 'name': + u = name + else: + u = cgroup_v1 + regexp = i[1] + command = i[2] + # print([u, regexp, command]) + if search(regexp, u) is not None: + log("Regexp '{}' matches with {} '{}'".format(regexp, unit, u)) + # print('СОВПАДЕНИЕ НАЙДЕНО') + soft_match = True + break + + if soft_match: + + # todo: make new func + m = check_mem_and_swap() + ma = int(m[0]) / 1024.0 + sf = int(m[2]) / 1024.0 + log('Memory status before implementing a corrective act' + 'ion:\n MemAvailable' + ': {} MiB, SwapFree: {} MiB'.format( + round(ma, 1), round(sf, 1) + ) + ) + + cmd = command.replace( + '$PID', + pid).replace( + '$NAME', + pid_to_name(pid)).replace( + '$SERVICE', + service) + + exit_status = exe(cmd) + + exit_status = str(exit_status) + + response_time = time() - time0 + + etc_info = 'Implement a corrective act' \ + 'ion:\n Run the command: {}' \ + '\n Exit status: {}; total response ' \ + 'time: {} ms'.format( + cmd, + exit_status, + round(response_time * 1000)) + + log(etc_info) + + key = "Run the command '{}'".format(cmd) + update_stat_dict_and_print(key) + + if gui_notifications: + send_notify_etc( + pid, + name, + command.replace('$PID', pid).replace( + '$NAME', pid_to_name(pid))) + + else: + + try: + + mem_available, swap_total, swap_free = check_mem_and_swap() + + ma_mib = int(mem_available) / 1024.0 + sf_mib = int(swap_free) / 1024.0 + log('Memory status before implementing a corrective act' + 'ion:\n MemAvailable' + ': {} MiB, SwapFree: {} MiB'.format( + round(ma_mib, 1), round(sf_mib, 1) + ) + ) + + if (mem_available <= mem_min_sigkill_kb and + swap_free <= swap_min_sigkill_kb): + log('Hard threshold exceeded') + signal = SIGKILL + + os.kill(int(pid), signal) + response_time = time() - time0 + send_result = 'total response time: {} ms'.format( + round(response_time * 1000)) + + preventing_oom_message = 'Implement a corrective action:' \ + '\n Send {} to the victim; {}'.format( + sig_dict[signal], send_result) + + key = 'Send {} to {}'.format(sig_dict[signal], name) + + if signal is SIGKILL and post_kill_exe != '': + + cmd = post_kill_exe.replace('$PID', pid).replace( + '$NAME', pid_to_name(pid)) + + log('Execute post_kill_exe') + + exe(cmd) + + if gui_notifications: + + # min delay after same notification + # все не так. От этого вообще пол дедупликация . терминация + # один раз покажется при любом раскладе. + delay_after_same_notify = 1 + + x = dick['v'] + + dick['v'] = [signal, name, pid, time()] + + y = dick['v'] + + # print(y[3] - x[3]) + + if x[0] == y[0] and x[1] == y[1] and x[2] == y[2]: + # print('совпадение имени, пид, сигнала') + + # сохр в словаре первре совпавшее время + dt = y[3] - x[3] + # print(dt, 'dt') + if dt < delay_after_same_notify: + notif = False + + if notif: + send_notify(signal, name, pid) + + except FileNotFoundError: + response_time = time() - time0 + send_result = 'no such process; response time: {} ms'.format( + round(response_time * 1000)) + key = 'FileNotFoundError (the victim died in the se' \ + 'arch process): ' + except ProcessLookupError: + response_time = time() - time0 + send_result = 'no such process; response time: {} ms'.format( + round(response_time * 1000)) + key = 'ProcessLookupError (the victim died in the se' \ + 'arch process): ' + + try: + log(preventing_oom_message) + except UnboundLocalError: + preventing_oom_message = key + + update_stat_dict_and_print(key) + + else: + + response_time = time() - time0 + victim_badness_is_too_small = 'victim badness {} < min_b' \ + 'adness {}; nothing to do; response time: {} ms'.format( + victim_badness, + min_badness, + round(response_time * 1000)) + + log(victim_badness_is_too_small) + + # update stat_dict + key = 'victim badness < min_badness' + update_stat_dict_and_print(key) + + sleep_after_send_signal(signal) + + +def sleep_after_check_mem(): + """Specify sleep times depends on rates and avialable memory.""" + + if mem_min_sigkill_kb < mem_min_sigterm_kb: + mem_point = mem_available - mem_min_sigterm_kb + else: + mem_point = mem_available - mem_min_sigkill_kb + + if swap_min_sigkill_kb < swap_min_sigterm_kb: + swap_point = swap_free - swap_min_sigterm_kb + else: + swap_point = swap_free - swap_min_sigkill_kb + + if swap_point < 0: + swap_point = 0 + + if mem_point < 0: + mem_point = 0 + + t_mem = mem_point / rate_mem + t_swap = swap_point / rate_swap + t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram + if t_zram < 0: + t_zram = 0 + + t_mem_swap = t_mem + t_swap + t_mem_zram = t_mem + t_zram + + if t_mem_swap <= t_mem_zram: + t = t_mem_swap + else: + t = t_mem_zram + + if t > max_sleep_time: + t = max_sleep_time + elif t < min_sleep_time: + t = min_sleep_time + else: + pass + + if print_sleep_periods: + + log( + 'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format( + round(t, 2), + round(t_mem, 2), + round(t_swap, 2), + round(t_zram, 2) + ) + ) + + try: + stdout.flush() + except OSError: # OSError: [Errno 105] No buffer space available + pass + + sleep(t) + + +def calculate_percent(arg_key): + """ + parse conf dict + Calculate mem_min_KEY_percent. + + Try use this one) + arg_key: str key for config_dict + returns int mem_min_percent or NoneType if got some error + """ + + if arg_key in config_dict: + mem_min = config_dict[arg_key] + + if mem_min.endswith('%'): + # truncate percents, so we have a number + mem_min_percent = mem_min[:-1].strip() + # then 'float test' + mem_min_percent = string_to_float_convert_test(mem_min_percent) + if mem_min_percent is None: + errprint('Invalid {} value, not float\nExit'.format(arg_key)) + exit(1) + # Final validations... + if mem_min_percent < 0 or mem_min_percent > 100: + errprint( + '{}, as percents value, out of ran' + 'ge [0; 100]\nExit'.format(arg_key)) + exit(1) + + # mem_min_sigterm_percent is clean and valid float percentage. Can + # translate into Kb + mem_min_kb = mem_min_percent / 100 * mem_total + mem_min_mb = round(mem_min_kb / 1024) + + elif mem_min.endswith('M'): + mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip()) + if mem_min_mb is None: + errprint('Invalid {} value, not float\nExit'.format(arg_key)) + exit(1) + mem_min_kb = mem_min_mb * 1024 + if mem_min_kb > mem_total: + errprint( + '{} value can not be greater then MemT' + 'otal ({} MiB)\nExit'.format( + arg_key, round( + mem_total / 1024))) + exit(1) + mem_min_percent = mem_min_kb / mem_total * 100 + + else: + log('Invalid {} units in config.\n Exit'.format(arg_key)) + exit(1) + mem_min_percent = None + + else: + log('{} not in config\nExit'.format(arg_key)) + exit(1) + mem_min_percent = None + + return mem_min_kb, mem_min_mb, mem_min_percent + + +########################################################################## + + +print_proc_table_flag = False + +# print(len(argv), argv) + +if len(argv) == 1: + if os.path.exists('./nohang.conf'): + config = os.getcwd() + '/nohang.conf' + else: + config = '/etc/nohang/nohang.conf' + +elif len(argv) == 2: + if argv[1] == '--help' or argv[1] == '-h': + print(help_mess) + exit() + elif argv[1] == '--version' or argv[1] == '-v': + print_version() + elif argv[1] == '--test' or argv[1] == '-t': + test() + elif argv[1] == '--print-proc-table' or argv[1] == '-p': + print_proc_table_flag = True + if os.path.exists('./nohang.conf'): + config = os.getcwd() + '/nohang.conf' + else: + config = '/etc/nohang/nohang.conf' + else: + errprint('Unknown option: {}'.format(argv[1])) + exit(1) + +elif len(argv) == 3: + if argv[1] == '--config' or argv[1] == '-c': + config = argv[2] + else: + errprint('Unknown option: {}'.format(argv[1])) + exit(1) + +else: + errprint('Invalid CLI input: too many options') + exit(1) + + +########################################################################## + + +# find mem_total +# find positions of SwapFree and SwapTotal in /proc/meminfo + +with open('/proc/meminfo') as f: + mem_list = f.readlines() + +mem_list_names = [] +for s in mem_list: + mem_list_names.append(s.split(':')[0]) + +if mem_list_names[2] != 'MemAvailable': + errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied') + # exit(1) + +swap_total_index = mem_list_names.index('SwapTotal') +swap_free_index = swap_total_index + 1 + +mem_total = int(mem_list[0].split(':')[1][:-4]) + +# Get names from /proc/*/status to be able to get VmRSS and VmSwap values + +with open('/proc/self/status') as file: + status_list = file.readlines() + +status_names = [] +for s in status_list: + status_names.append(s.split(':')[0]) + +ppid_index = status_names.index('PPid') +vm_size_index = status_names.index('VmSize') +vm_rss_index = status_names.index('VmRSS') +vm_swap_index = status_names.index('VmSwap') +uid_index = status_names.index('Uid') +state_index = status_names.index('State') + + +try: + anon_index = status_names.index('RssAnon') + file_index = status_names.index('RssFile') + shmem_index = status_names.index('RssShmem') + detailed_rss = True + # print(detailed_rss, 'detailed_rss') +except ValueError: + detailed_rss = False + # print('It is not Linux 4.5+') + +########################################################################## + + +log('Config: ' + config) + + +########################################################################## + +# parsing the config with obtaining the parameters dictionary + +# conf_parameters_dict +# conf_restart_dict + +# dictionary with config options +config_dict = dict() + +processname_re_list = [] +cmdline_re_list = [] +environ_re_list = [] +uid_re_list = [] +cgroup_v1_re_list = [] +cgroup_v2_re_list = [] +realpath_re_list = [] + +soft_actions_list = [] + + +# separator for optional parameters (that starts with @) +opt_separator = '///' + + +# stupid conf parsing, need refactoring +try: + with open(config) as f: + + for line in f: + + a = line.startswith('#') + b = line.startswith('\n') + c = line.startswith('\t') + d = line.startswith(' ') + + etc = line.startswith('@SOFT_ACTION_RE_NAME') + etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1') + + if not a and not b and not c and not d and not etc and not etc2: + a = line.partition('=') + + key = a[0].strip() + value = a[2].strip() + + if key not in config_dict: + config_dict[key] = value + else: + log('ERROR: config key duplication: {}'.format(key)) + exit(1) + + if etc: + + # это остаток строки без первого ключа. Содержит: регулярка /// + # команда + a = line.partition('@SOFT_ACTION_RE_NAME')[ + 2].partition(opt_separator) + + a1 = 'name' + + a2 = a[0].strip() + valid_re(a2) + + a3 = a[2].strip() + + zzz = (a1, a2, a3) + + # print(zzz) + + soft_actions_list.append(zzz) + + if etc2: + + # это остаток строки без первого ключа. Содержит: регулярка /// + # команда + a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[ + 2].partition(opt_separator) + + a1 = 'cgroup_v1' + + a2 = a[0].strip() + valid_re(a2) + + a3 = a[2].strip() + + zzz = (a1, a2, a3) + + # print(zzz) + + soft_actions_list.append(zzz) + + if line.startswith('@PROCESSNAME_RE'): + a = line.partition( + '@PROCESSNAME_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + processname_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@CMDLINE_RE'): + a = line.partition( + '@CMDLINE_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cmdline_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@UID_RE'): + a = line.partition( + '@UID_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + uid_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@CGROUP_V1_RE'): + a = line.partition( + '@CGROUP_V1_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cgroup_v1_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@CGROUP_V2_RE'): + a = line.partition( + '@CGROUP_V2_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cgroup_v2_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@REALPATH_RE'): + a = line.partition( + '@REALPATH_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + realpath_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@ENVIRON_RE'): + a = line.partition( + '@ENVIRON_RE')[2].strip(' \n').partition(opt_separator) + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + environ_re_list.append((badness_adj, reg_exp)) + + +except PermissionError: + errprint('PermissionError', conf_err_mess) + exit(1) +except UnicodeDecodeError: + errprint('UnicodeDecodeError', conf_err_mess) + exit(1) +except IsADirectoryError: + errprint('IsADirectoryError', conf_err_mess) + exit(1) +except IndexError: + errprint('IndexError', conf_err_mess) + exit(1) +except FileNotFoundError: + errprint('FileNotFoundError', conf_err_mess) + exit(1) + + +if processname_re_list == []: + regex_matching = False +else: + regex_matching = True + + +if cmdline_re_list == []: + re_match_cmdline = False +else: + re_match_cmdline = True + + +if uid_re_list == []: + re_match_uid = False +else: + re_match_uid = True + + +if environ_re_list == []: + re_match_environ = False +else: + re_match_environ = True + + +if realpath_re_list == []: + re_match_realpath = False +else: + re_match_realpath = True + + +if cgroup_v1_re_list == []: + re_match_cgroup_v1 = False +else: + re_match_cgroup_v1 = True + +if cgroup_v2_re_list == []: + re_match_cgroup_v2 = False +else: + re_match_cgroup_v2 = True + + +# print(processname_re_list) +# print(cmdline_re_list) +# print(uid_re_list) +# print(environ_re_list) +# print(realpath_re_list) +# print(cgroup_v1_re_list) +# print(cgroup_v2_re_list) + + +# print(soft_actions_list) + +if soft_actions_list == []: + soft_actions = False +else: + soft_actions = True + +# print('soft_actions:', soft_actions) + +########################################################################## + + +# extracting parameters from the dictionary +# check for all necessary parameters +# validation of all parameters +psi_debug = conf_parse_bool('psi_debug') +print_total_stat = conf_parse_bool('print_total_stat') +print_proc_table = conf_parse_bool('print_proc_table') +forbid_negative_badness = conf_parse_bool('forbid_negative_badness') +print_victim_info = conf_parse_bool('print_victim_info') +print_config = conf_parse_bool('print_config') +print_mem_check_results = conf_parse_bool('print_mem_check_results') +print_sleep_periods = conf_parse_bool('print_sleep_periods') +gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings') +gui_notifications = conf_parse_bool('gui_notifications') +decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj') +ignore_psi = conf_parse_bool('ignore_psi') + + +# regex_matching = conf_parse_bool('regex_matching') +# re_match_cmdline = conf_parse_bool('re_match_cmdline') +# re_match_uid = conf_parse_bool('re_match_uid') +# re_match_cgroup_v1 = conf_parse_bool('re_match_cgroup_v1') +# re_match_cgroup_v2 = conf_parse_bool('re_match_cgroup_v2') +# re_match_realpath = conf_parse_bool('re_match_realpath') +# re_match_environ = conf_parse_bool('re_match_environ') + + +# if regex_matching or re_match_cmdline or re_match_uid or re_match_cgroup +# or re_match_realpath: +# from re import search +# from sre_constants import error as invalid_re + +(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent + ) = calculate_percent('mem_min_sigterm') + +(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent + ) = calculate_percent('mem_min_sigkill') + +(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent + ) = calculate_percent('zram_max_sigterm') + +(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent + ) = calculate_percent('zram_max_sigkill') + +(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent + ) = calculate_percent('mem_min_warnings') + +(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent + ) = calculate_percent('zram_max_warnings') + + +if 'rate_mem' in config_dict: + rate_mem = string_to_float_convert_test(config_dict['rate_mem']) + if rate_mem is None: + errprint('Invalid rate_mem value, not float\nExit') + exit(1) + if rate_mem <= 0: + errprint('rate_mem MUST be > 0\nExit') + exit(1) +else: + errprint('rate_mem not in config\nExit') + exit(1) + + +if 'rate_swap' in config_dict: + rate_swap = string_to_float_convert_test(config_dict['rate_swap']) + if rate_swap is None: + errprint('Invalid rate_swap value, not float\nExit') + exit(1) + if rate_swap <= 0: + errprint('rate_swap MUST be > 0\nExit') + exit(1) +else: + errprint('rate_swap not in config\nExit') + exit(1) + + +if 'rate_zram' in config_dict: + rate_zram = string_to_float_convert_test(config_dict['rate_zram']) + if rate_zram is None: + errprint('Invalid rate_zram value, not float\nExit') + exit(1) + if rate_zram <= 0: + errprint('rate_zram MUST be > 0\nExit') + exit(1) +else: + errprint('rate_zram not in config\nExit') + exit(1) + + +if 'swap_min_sigterm' in config_dict: + swap_min_sigterm = config_dict['swap_min_sigterm'] +else: + errprint('swap_min_sigterm not in config\nExit') + exit(1) + + +if 'swap_min_sigkill' in config_dict: + swap_min_sigkill = config_dict['swap_min_sigkill'] +else: + errprint('swap_min_sigkill not in config\nExit') + exit(1) + + +if 'min_delay_after_sigterm' in config_dict: + min_delay_after_sigterm = string_to_float_convert_test( + config_dict['min_delay_after_sigterm']) + if min_delay_after_sigterm is None: + errprint('Invalid min_delay_after_sigterm value, not float\nExit') + exit(1) + if min_delay_after_sigterm < 0: + errprint('min_delay_after_sigterm must be positiv\nExit') + exit(1) +else: + errprint('min_delay_after_sigterm not in config\nExit') + exit(1) + + +if 'min_delay_after_sigkill' in config_dict: + min_delay_after_sigkill = string_to_float_convert_test( + config_dict['min_delay_after_sigkill']) + if min_delay_after_sigkill is None: + errprint('Invalid min_delay_after_sigkill value, not float\nExit') + exit(1) + if min_delay_after_sigkill < 0: + errprint('min_delay_after_sigkill must be positive\nExit') + exit(1) +else: + errprint('min_delay_after_sigkill not in config\nExit') + exit(1) + + +if 'psi_post_action_delay' in config_dict: + psi_post_action_delay = string_to_float_convert_test( + config_dict['psi_post_action_delay']) + if psi_post_action_delay is None: + errprint('Invalid psi_post_action_delay value, not float\nExit') + exit(1) + if psi_post_action_delay < 0: + errprint('psi_post_action_delay must be positive\nExit') + exit(1) +else: + errprint('psi_post_action_delay not in config\nExit') + exit(1) + + +if 'sigkill_psi_threshold' in config_dict: + sigkill_psi_threshold = string_to_float_convert_test( + config_dict['sigkill_psi_threshold']) + if sigkill_psi_threshold is None: + errprint('Invalid sigkill_psi_threshold value, not float\nExit') + exit(1) + if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100: + errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit') + exit(1) +else: + errprint('sigkill_psi_threshold not in config\nExit') + exit(1) + + +if 'sigterm_psi_threshold' in config_dict: + sigterm_psi_threshold = string_to_float_convert_test( + config_dict['sigterm_psi_threshold']) + if sigterm_psi_threshold is None: + errprint('Invalid sigterm_psi_threshold value, not float\nExit') + exit(1) + if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100: + errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit') + exit(1) +else: + errprint('sigterm_psi_threshold not in config\nExit') + exit(1) + + +if 'min_badness' in config_dict: + min_badness = string_to_int_convert_test( + config_dict['min_badness']) + if min_badness is None: + errprint('Invalid min_badness value, not integer\nExit') + exit(1) + if min_badness < 0 or min_badness > 1000: + errprint('Invalud min_badness value\nExit') + exit(1) +else: + errprint('min_badness not in config\nExit') + exit(1) + + +if 'oom_score_adj_max' in config_dict: + oom_score_adj_max = string_to_int_convert_test( + config_dict['oom_score_adj_max']) + if oom_score_adj_max is None: + errprint('Invalid oom_score_adj_max value, not integer\nExit') + exit(1) + if oom_score_adj_max < 0 or oom_score_adj_max > 1000: + errprint('Invalid oom_score_adj_max value\nExit') + exit(1) +else: + errprint('oom_score_adj_max not in config\nExit') + exit(1) + + +if 'min_time_between_warnings' in config_dict: + min_time_between_warnings = string_to_float_convert_test( + config_dict['min_time_between_warnings']) + if min_time_between_warnings is None: + errprint('Invalid min_time_between_warnings value, not float\nExit') + exit(1) + if min_time_between_warnings < 1 or min_time_between_warnings > 300: + errprint('min_time_between_warnings value out of range [1; 300]\nExit') + exit(1) +else: + errprint('min_time_between_warnings not in config\nExit') + exit(1) + + +if 'swap_min_warnings' in config_dict: + swap_min_warnings = config_dict['swap_min_warnings'] +else: + errprint('swap_min_warnings not in config\nExit') + exit(1) + + +if 'max_ancestry_depth' in config_dict: + max_ancestry_depth = string_to_int_convert_test( + config_dict['max_ancestry_depth']) + if min_badness is None: + errprint('Invalid max_ancestry_depth value, not integer\nExit') + exit(1) + if max_ancestry_depth < 1: + errprint('Invalud max_ancestry_depth value\nExit') + exit(1) +else: + errprint('max_ancestry_depth is not in config\nExit') + exit(1) + + +if 'max_post_sigterm_victim_lifetime' in config_dict: + max_post_sigterm_victim_lifetime = string_to_float_convert_test( + config_dict['max_post_sigterm_victim_lifetime']) + if max_post_sigterm_victim_lifetime is None: + errprint('Invalid max_post_sigterm_victim_lifetime val' + 'ue, not float\nExit') + exit(1) + if max_post_sigterm_victim_lifetime < 0: + errprint('max_post_sigterm_victim_lifetime must be non-n' + 'egative number\nExit') + exit(1) +else: + errprint('max_post_sigterm_victim_lifetime is not in config\nExit') + exit(1) + + +if 'post_kill_exe' in config_dict: + post_kill_exe = config_dict['post_kill_exe'] +else: + errprint('post_kill_exe is not in config\nExit') + exit(1) + + +if 'psi_path' in config_dict: + psi_path = config_dict['psi_path'] +else: + errprint('psi_path is not in config\nExit') + exit(1) + + +if 'psi_metrics' in config_dict: + psi_metrics = config_dict['psi_metrics'] +else: + errprint('psi_metrics is not in config\nExit') + exit(1) + + +if 'warning_exe' in config_dict: + warning_exe = config_dict['warning_exe'] + if warning_exe != '': + check_warning_exe = True + else: + check_warning_exe = False +else: + errprint('warning_exe is not in config\nExit') + exit(1) + + +if 'extra_table_info' in config_dict: + extra_table_info = config_dict['extra_table_info'] + if (extra_table_info != 'None' and + extra_table_info != 'cgroup_v1' and + extra_table_info != 'cgroup_v2' and + extra_table_info != 'cmdline' and + extra_table_info != 'environ' and + extra_table_info != 'realpath' and + extra_table_info != 'All'): + + errprint('Invalid config: invalid extra_table_info value\nExit') + exit(1) +else: + errprint('Invalid config: extra_table_info is not in config\nExit') + exit(1) + + +separate_log = conf_parse_bool('separate_log') + +if separate_log: + + import logging + from logging import basicConfig + from logging import info + + log_dir = '/var/log/nohang' + + try: + os.mkdir(log_dir) + except PermissionError: + print('ERROR: can not create log dir') + except FileExistsError: + pass + + logfile = log_dir + '/nohang.log' + + try: + with open(logfile, 'a') as f: + pass + except FileNotFoundError: + print('ERROR: log FileNotFoundError') + except PermissionError: + print('ERROR: log PermissionError') + + try: + basicConfig( + filename=logfile, + level=logging.INFO, + format="%(asctime)s: %(message)s") + except PermissionError: + errprint('ERROR: Permission denied: {}'.format(logfile)) + except FileNotFoundError: + errprint('ERROR: FileNotFoundError: {}'.format(logfile)) + + +if 'min_mem_report_interval' in config_dict: + min_mem_report_interval = string_to_float_convert_test( + config_dict['min_mem_report_interval']) + if min_mem_report_interval is None: + errprint('Invalid min_mem_report_interval value, not float\nExit') + exit(1) + if min_mem_report_interval < 0: + errprint('min_mem_report_interval must be non-negative number\nExit') + exit(1) +else: + errprint('min_mem_report_interval is not in config\nExit') + exit(1) + + +if 'max_sleep_time' in config_dict: + max_sleep_time = string_to_float_convert_test( + config_dict['max_sleep_time']) + if max_sleep_time is None: + errprint('Invalid max_sleep_time value, not float\nExit') + exit(1) + if max_sleep_time <= 0: + errprint('max_sleep_time must be positive number\nExit') + exit(1) +else: + errprint('max_sleep_time is not in config\nExit') + exit(1) + + +if 'min_sleep_time' in config_dict: + min_sleep_time = string_to_float_convert_test( + config_dict['min_sleep_time']) + if min_sleep_time is None: + errprint('Invalid min_sleep_time value, not float\nExit') + exit(1) + if min_sleep_time <= 0: + errprint('min_sleep_time must be positive number\nExit') + exit(1) +else: + errprint('min_sleep_time is not in config\nExit') + exit(1) + + +if max_sleep_time < min_sleep_time: + errprint( + 'max_sleep_time value must not exceed min_sleep_time value.\nExit' + ) + exit(1) + + +if print_proc_table_flag: + + if not root: + log('WARNING: effective UID != 0; euid={}; processes with other e' + 'uids will be invisible for nohang'.format(self_uid)) + + func_print_proc_table() + + +########################################################################## + + +psi_support = os.path.exists(psi_path) + + +########################################################################## + + +# Get KiB levels if it's possible. + +# получ кб. если не кб - то процент. Если процент - находим кб ниже на +# основе полученного своптотал и процентов. + + +def get_swap_threshold_tuple(string): + # re (Num %, True) or (Num KiB, False) + """Returns KiB value if abs val was set in config, or tuple with %""" + # return tuple with abs and bool: (abs %, True) or (abs MiB, False) + + if string.endswith('%'): + valid = string_to_float_convert_test(string[:-1]) + if valid is None: + errprint('somewhere swap unit is not float_%') + exit(1) + + value = float(string[:-1].strip()) + if value < 0 or value > 100: + errprint('invalid value, must be from the range[0; 100] %') + exit(1) + + return value, True + + elif string.endswith('M'): + valid = string_to_float_convert_test(string[:-1]) + if valid is None: + errprint('somewhere swap unit is not float_M') + exit(1) + + value = float(string[:-1].strip()) * 1024 + if value < 0: + errprint('invalid unit in config (negative value)') + exit(1) + + return value, False + + else: + errprint( + 'Invalid config file. There are invalid units somewhere\nExit') + exit(1) + + +swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm) +swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill) +swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings) + + +swap_term_is_percent = swap_min_sigterm_tuple[1] +if swap_term_is_percent: + swap_min_sigterm_percent = swap_min_sigterm_tuple[0] +else: + swap_min_sigterm_kb = swap_min_sigterm_tuple[0] + + +swap_kill_is_percent = swap_min_sigkill_tuple[1] +if swap_kill_is_percent: + swap_min_sigkill_percent = swap_min_sigkill_tuple[0] +else: + swap_min_sigkill_kb = swap_min_sigkill_tuple[0] + + +swap_warn_is_percent = swap_min_warnings_tuple[1] +if swap_warn_is_percent: + swap_min_warnings_percent = swap_min_warnings_tuple[0] +else: + swap_min_warnings_kb = swap_min_warnings_tuple[0] + + +########################################################################## + +# outdated section, need fixes + +if print_config: + + print( + '\n1. Memory levels to respond to as an OOM threat\n[display' + 'ing these options need fix]\n') + + print('mem_min_sigterm: {} MiB, {} %'.format( + round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1))) + print('mem_min_sigkill: {} MiB, {} %'.format( + round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1))) + + print('swap_min_sigterm: {}'.format(swap_min_sigterm)) + print('swap_min_sigkill: {}'.format(swap_min_sigkill)) + + print('zram_max_sigterm: {} MiB, {} %'.format( + round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1))) + print('zram_max_sigkill: {} MiB, {} %'.format( + round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1))) + + print('\n2. The frequency of checking the level of available m' + 'emory (and CPU usage)\n') + print('rate_mem: {}'.format(rate_mem)) + print('rate_swap: {}'.format(rate_swap)) + print('rate_zram: {}'.format(rate_zram)) + + print('\n3. The prevention of killing innocent victims\n') + print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm)) + print('min_delay_after_sigkill: {}'.format(min_delay_after_sigkill)) + print('min_badness: {}'.format(min_badness)) + + # False (OK) - OK не нужен когда фолс + print('decrease_oom_score_adj: {}'.format( + decrease_oom_score_adj + )) + if decrease_oom_score_adj: + print('oom_score_adj_max: {}'.format(oom_score_adj_max)) + + print('\n4. Impact on the badness of processes via matching their' + ' names, cmdlines ir UIDs with regular expressions\n') + + print('(todo)') + + print('\n5. The execution of a specific command instead of sen' + 'ding the\nSIGTERM signal\n') + + print('\n6. GUI notifications:\n- OOM prevention results and\n- low m' + 'emory warnings\n') + print('gui_notifications: {}'.format(gui_notifications)) + + print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings)) + if gui_low_memory_warnings: + print('min_time_between_warnings: {}'.format( + min_time_between_warnings)) + + print('mem_min_warnings: {} MiB, {} %'.format( + round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1))) + + print('swap_min_warnings: {}'.format(swap_min_warnings)) + + print('zram_max_warnings: {} MiB, {} %'.format( + round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1))) + + print('\n7. Output verbosity\n') + print('print_config: {}'.format(print_config)) + print('print_mem_check_results: {}'.format(print_mem_check_results)) + print('print_sleep_periods: {}\n'.format(print_sleep_periods)) + + +########################################################################## + + +# for calculating the column width when printing mem and zram +mem_len = len(str(round(mem_total / 1024.0))) + +if gui_notifications: + notify_sig_dict = {SIGKILL: 'Killing', + SIGTERM: 'Terminating'} + + +# convert rates from MiB/s to KiB/s +rate_mem = rate_mem * 1024 +rate_swap = rate_swap * 1024 +rate_zram = rate_zram * 1024 + + +warn_time_now = 0 +warn_time_delta = 1000 +warn_timer = 0 + + +########################################################################## + + + + +if not root: + log('WARNING: effective UID != 0; euid={}; processes with other e' + 'uids will be invisible for nohang'.format(self_uid)) + + + +# Try to lock all memory + +mlockall() + +########################################################################## + + + +print_self_rss() + + +# if print_proc_table: +# find_victim(print_proc_table) + +log('Monitoring has started!') + +stdout.flush() + +########################################################################## + +psi_avg_string = '' # will be overwritten if PSI monitoring enabled + + +if psi_support and not ignore_psi: + psi_t0 = time() + + +if print_mem_check_results: + + # to find delta mem + wt2 = 0 + new_mem = 0 + + # init mem report interval + report0 = 0 + + +# handle signals +sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP, SIGABRT, SIGSEGV, SIGBUS] +for signum in sig_list: + signal(signum, signal_handler) + + +while True: + + if psi_support and not ignore_psi: + + psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics) + + if print_mem_check_results: + psi_avg_string = 'PSI avg value: {} | '.format( + str(psi_avg_value).rjust(6)) + + if psi_avg_value >= sigkill_psi_threshold: + sigkill_psi_exceeded = True + else: + sigkill_psi_exceeded = False + + if psi_avg_value >= sigterm_psi_threshold: + sigterm_psi_exceeded = True + else: + sigterm_psi_exceeded = False + + if time() - psi_t0 >= psi_post_action_delay: + psi_post_action_delay_exceeded = True + else: + psi_post_action_delay_exceeded = False + + if psi_debug: + log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps' + 'i_post_action_delay_exceeded: {}'.format( + sigterm_psi_exceeded, + sigkill_psi_exceeded, + psi_post_action_delay_exceeded)) + + if sigkill_psi_exceeded and psi_post_action_delay_exceeded: + time0 = time() + mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \ + 'old ({})'.format( + psi_avg_value, sigkill_psi_threshold) + + implement_corrective_action(SIGKILL) + + psi_t0 = time() + continue + + if sigterm_psi_exceeded and psi_post_action_delay_exceeded: + time0 = time() + mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \ + 'shold ({})'.format(psi_avg_value, sigterm_psi_threshold) + + implement_corrective_action(SIGTERM) + + psi_t0 = time() + continue + + mem_available, swap_total, swap_free = check_mem_and_swap() + + # print(mem_available, swap_total, swap_free) + + # если метры - получаем киб выше и сразу. см. + + # if swap_min_sigkill is set in percent + if swap_kill_is_percent: + swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0 + + if swap_term_is_percent: + swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0 + + if swap_warn_is_percent: + swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0 + + # в общем случае для работы нужны килобайты. Если в процентах задано - + # находим КБ тут, после получения своптотал. + + mem_used_zram = check_zram() + + if print_mem_check_results: + + wt1 = time() + + delta = (mem_available + swap_free) - new_mem + + t_cycle = wt1 - wt2 + + report_delta = wt1 - report0 + + if report_delta >= min_mem_report_interval: + + mem_report = True + new_mem = mem_available + swap_free + + report0 = wt1 + + else: + mem_report = False + + wt2 = time() + + if mem_report: + + speed = delta / 1024.0 / report_delta + speed_info = ' | dMem: {} M/s'.format( + str(round(speed)).rjust(5) + ) + + # Calculate 'swap-column' width + swap_len = len(str(round(swap_total / 1024.0))) + + # Output available mem sizes + if swap_total == 0 and mem_used_zram == 0: + log('{}MemAvail: {} M, {} %{}'.format( + psi_avg_string, + human(mem_available, mem_len), + just_percent_mem(mem_available / mem_total), + speed_info + ) + ) + + elif swap_total > 0 and mem_used_zram == 0: + log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format( + psi_avg_string, + human(mem_available, mem_len), + just_percent_mem(mem_available / mem_total), + human(swap_free, swap_len), + just_percent_swap(swap_free / (swap_total + 0.1)), + speed_info + ) + ) + + else: + log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem' + 'UsedZram: {} M, {} %{}'.format( + psi_avg_string, + human(mem_available, mem_len), + just_percent_mem(mem_available / mem_total), + human(swap_free, swap_len), + just_percent_swap(swap_free / (swap_total + 0.1)), + human(mem_used_zram, mem_len), + just_percent_mem(mem_used_zram / mem_total), + speed_info + ) + ) + + # если swap_min_sigkill задан в абсолютной величине и Swap_total = 0 + if swap_total > swap_min_sigkill_kb: # If swap_min_sigkill is absolute + swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1)) + else: + swap_sigkill_pc = '-' + + if swap_total > swap_min_sigterm_kb: + swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1)) + else: + + # печатать так: SwapTotal = 0, ignore swapspace + swap_sigterm_pc = '-' + + # это для печати меминфо. Все переработать нахрен. + + # далее пошла проверка превышения порогов + + # MEM SWAP KILL + if (mem_available <= mem_min_sigkill_kb and + swap_free <= swap_min_sigkill_kb): + time0 = time() + + mem_info = 'Hard threshold exceeded\nMemory status that requ' \ + 'ires corrective actions:' \ + '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ + 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ + 'p_min_sigkill [{} MiB, {} %]'.format( + kib_to_mib(mem_available), + percent(mem_available / mem_total), + kib_to_mib(mem_min_sigkill_kb), + percent(mem_min_sigkill_kb / mem_total), + kib_to_mib(swap_free), + percent(swap_free / (swap_total + 0.1)), + kib_to_mib(swap_min_sigkill_kb), + swap_sigkill_pc) + + implement_corrective_action(SIGKILL) + + psi_t0 = time() + continue + + # ZRAM KILL + if mem_used_zram >= zram_max_sigkill_kb: + time0 = time() + + mem_info = 'Hard threshold exceeded\nMemory status that requir' \ + 'es corrective actions:' \ + '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \ + 'kill [{} MiB, {} %]'.format( + kib_to_mib(mem_used_zram), + percent(mem_used_zram / mem_total), + kib_to_mib(zram_max_sigkill_kb), + percent(zram_max_sigkill_kb / mem_total)) + + implement_corrective_action(SIGKILL) + + psi_t0 = time() + continue + + # MEM SWAP TERM + if mem_available <= mem_min_sigterm_kb and \ + swap_free <= swap_min_sigterm_kb: + + time0 = time() + + mem_info = 'Soft threshold exceeded\nMemory status that requi' \ + 'res corrective actions:' \ + '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ + 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ + 'p_min_sigterm [{} MiB, {} %]'.format( + kib_to_mib(mem_available), + percent(mem_available / mem_total), + kib_to_mib(mem_min_sigterm_kb), + # percent(mem_min_sigterm_kb / mem_total), + # ОКРУГЛЯТЬ НА МЕСТЕ ВЫШЕ (или не выше, хз) + round(mem_min_sigterm_percent, 1), + kib_to_mib(swap_free), + percent(swap_free / (swap_total + 0.1)), + kib_to_mib(swap_min_sigterm_kb), + swap_sigterm_pc) + + implement_corrective_action(SIGTERM) + + psi_t0 = time() + continue + + # ZRAM TERM + if mem_used_zram >= zram_max_sigterm_kb: + time0 = time() + + mem_info = 'Soft threshold exceeded\nMemory status that requ' \ + 'ires corrective actions:' \ + '\n MemUsedZram [{} MiB, {} %] >= ' \ + 'zram_max_sigterm [{} M, {} %]'.format( + kib_to_mib(mem_used_zram), + percent(mem_used_zram / mem_total), + kib_to_mib(zram_max_sigterm_kb), + percent(zram_max_sigterm_kb / mem_total)) + + implement_corrective_action(SIGTERM) + + psi_t0 = time() + continue + + # LOW MEMORY WARNINGS + if gui_low_memory_warnings: + + if mem_available <= mem_min_warnings_kb and \ + swap_free <= swap_min_warnings_kb + 0.1 or \ + mem_used_zram >= zram_max_warnings_kb: + warn_time_delta = time() - warn_time_now + warn_time_now = time() + warn_timer += warn_time_delta + if warn_timer > min_time_between_warnings: + send_notify_warn() + warn_timer = 0 + + # SLEEP BETWEEN MEM CHECKS + sleep_after_check_mem() diff --git a/trash/oom-trigger b/trash/oom-trigger index 376df5b..f0dcdf5 100755 --- a/trash/oom-trigger +++ b/trash/oom-trigger @@ -6,13 +6,13 @@ from memco import * from signal import signal, SIGTERM from time import sleep - +from sys import exit def signal_handler(signum, frame): print('Got signal {}'.format(signum)) - #sleep(1) - exit() + # sleep(1) + # exit() signal(SIGTERM, signal_handler) diff --git a/trash/thanatolog b/trash/thanatolog index 5ab9cfe..76c2599 100755 --- a/trash/thanatolog +++ b/trash/thanatolog @@ -116,13 +116,10 @@ print('Name:', name) print('RSS at startup: {} (100.0 %)'.format(int(rss0))) print('MemAvail:', ma) -send_signal = SIGKILL - - - -os.kill(int(pid), SIGCONT) +send_signal = SIGTERM +# os.kill(int(pid), SIGCONT) os.kill(int(pid), send_signal) diff --git a/trash/x01 b/trash/x01 new file mode 100755 index 0000000..312bc14 --- /dev/null +++ b/trash/x01 @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 + + +from os import getpid + +# find mem_total +# find positions of SwapFree and SwapTotal in /proc/meminfo + +with open('/proc/meminfo') as f: + mem_list = f.readlines() + +mem_list_names = [] +for s in mem_list: + mem_list_names.append(s.split(':')[0]) + +if mem_list_names[2] != 'MemAvailable': + errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied') + # exit(1) + +swap_total_index = mem_list_names.index('SwapTotal') +swap_free_index = swap_total_index + 1 + +mem_total = int(mem_list[0].split(':')[1][:-4]) + +# Get names from /proc/*/status to be able to get VmRSS and VmSwap values + +with open('/proc/self/status') as file: + status_list = file.readlines() + +status_names = [] +for s in status_list: + status_names.append(s.split(':')[0]) + +ppid_index = status_names.index('PPid') +vm_size_index = status_names.index('VmSize') +vm_rss_index = status_names.index('VmRSS') +vm_swap_index = status_names.index('VmSwap') +uid_index = status_names.index('Uid') +state_index = status_names.index('State') + + +try: + anon_index = status_names.index('RssAnon') + file_index = status_names.index('RssFile') + shmem_index = status_names.index('RssShmem') + detailed_rss = True + # print(detailed_rss, 'detailed_rss') +except ValueError: + detailed_rss = False + # print('It is not Linux 4.5+') + + + +self_pid = str(getpid()) + + +def self_rss(): + r = pid_to_status(self_pid)[5] + print(r) + + + +def pid_to_status(pid): + """ + """ + + try: + + with open('/proc/' + pid + '/status') as f: + + for n, line in enumerate(f): + + if n is 0: + name = line.split('\t')[1][:-1] + + if n is state_index: + state = line.split('\t')[1][0] + continue + + if n is ppid_index: + ppid = line.split('\t')[1][:-1] + continue + + if n is uid_index: + uid = line.split('\t')[2] + continue + + if n is vm_size_index: + vm_size = int(line.split('\t')[1][:-4]) + continue + + if n is vm_rss_index: + vm_rss = int(line.split('\t')[1][:-4]) + continue + + if n is vm_swap_index: + vm_swap = int(line.split('\t')[1][:-4]) + break + + return name, state, ppid, uid, vm_size, vm_rss, vm_swap + + except UnicodeDecodeError: + return pid_to_status_unicode(pid) + + except FileNotFoundError: + return None + + except ProcessLookupError: + return None + + except ValueError: + return None + + +self_rss() + + +import logging +import subprocess +import argparse + + +self_rss() +