#!/usr/bin/env python3 """A daemon that prevents OOM in Linux systems.""" from time import sleep, time start_time = time() import os from operator import itemgetter # this is most slow import from argparse import ArgumentParser from sys import stdout from signal import SIGKILL, SIGTERM sig_dict = {SIGKILL: 'SIGKILL', SIGTERM: 'SIGTERM'} ''' nm = 30 nc = nm + 1 ''' self_uid = os.geteuid() self_pid = str(os.getpid()) wait_time = 2 cache_time = 30 cache_path = '/dev/shm/nohang_env_cache' ########################################################################## # function definition section def format_time(t): t = int(t) if t < 60: return '{} sec'.format(t) elif t >= 60 and t < 3600: m = t // 60 s = t % 60 return '{} min {} sec'.format(m, s) else: h = t // 3600 s0 = t - h * 3600 m = s0 // 60 s = s0 % 60 return '{} h {} min {} sec'.format(h, m, s) def re_pid_environ(pid): """ read environ of 1 process returns tuple with USER, DBUS, DISPLAY like follow: ('user', 'DISPLAY=:0', 'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus') returns None if these vars is not in /proc/[pid]/environ """ display_env = 'DISPLAY=' dbus_env = 'DBUS_SESSION_BUS_ADDRESS=' user_env = 'USER=' try: env = str(rline1('/proc/' + pid + '/environ')) if display_env in env and dbus_env in env and user_env in env: env_list = env.split('\x00') # iterating over a list of process environment variables for i in env_list: if i.startswith(user_env): user = i continue if i.startswith(display_env): display = i[:10] continue if i.startswith(dbus_env): if ',guid=' in i: return None dbus = i continue if i.startswith('HOME='): # exclude Display Manager's user if i.startswith('HOME=/var'): return None env = user.partition('USER=')[2], display, dbus return env except FileNotFoundError: pass except ProcessLookupError: pass def root_notify_env(): """return set(user, display, dbus)""" unsorted_envs_list = [] # iterates over processes, find processes with suitable env for pid in os.listdir('/proc'): if pid[0].isdecimal() is False: continue one_env = re_pid_environ(pid) unsorted_envs_list.append(one_env) env = set(unsorted_envs_list) env.discard(None) return env def string_to_float_convert_test(string): """Try to interprete string values as floats.""" try: return float(string) except ValueError: return None def string_to_int_convert_test(string): """Try to interpret string values as integers.""" try: return int(string) except ValueError: return None # extracting the parameter from the config dictionary, str return def conf_parse_string(param): """ Get string parameters from the config dict. param: config_dict key returns config_dict[param].strip() """ if param in config_dict: return config_dict[param].strip() else: print('All the necessary parameters must be in the config') print('There is no "{}" parameter in the config'.format(param)) exit() # extracting the parameter from the config dictionary, bool return def conf_parse_bool(param): """ Get bool parameters from the config_dict. param: config_dicst key returns bool """ if param in config_dict: param_str = config_dict[param] if param_str == 'True': return True elif param_str == 'False': return False else: print('Invalid value of the "{}" parameter.'.format(param_str)) print('Valid values are True and False.') print('Exit') exit() else: print('All the necessary parameters must be in the config') print('There is no "{}" parameter in the config'.format(param_str)) exit() def rline1(path): """read 1st line from path.""" with open(path) as f: for line in f: return line[:-1] def write(path, string): """Write string to path.""" with open(path, 'w') as f: f.write(string) def kib_to_mib(num): """Convert KiB values to MiB values.""" return round(num / 1024.0) def percent(num): """Interprete mum as percentage.""" return round(num * 100, 1) def just_percent_mem(num): """convert num to percent and justify""" return str(round(num * 100, 1)).rjust(4, ' ') def just_percent_swap(num): return str(round(num * 100, 1)).rjust(5, ' ') def human(num, lenth): """Convert KiB values to MiB values with right alignment""" return str(round(num / 1024)).rjust(lenth, ' ') # return str with amount of bytes def zram_stat(zram_id): """ Get zram state. zram_id: str zram block-device id returns bytes diskcize, str mem_used_total """ try: disksize = rline1('/sys/block/' + zram_id + '/disksize') except FileNotFoundError: return '0', '0' if disksize == ['0\n']: return '0', '0' try: mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ') mm_stat_list = [] for i in mm_stat: if i != '': mm_stat_list.append(i) mem_used_total = mm_stat_list[2] except FileNotFoundError: mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total') return disksize, mem_used_total # BYTES, str # return process name def pid_to_name(pid): """ Get process name by pid. pid: str pid of required process returns string process_name """ try: with open('/proc/' + pid + '/status') as f: for line in f: return line[:-1].split('\t')[1] except FileNotFoundError: return '' except ProcessLookupError: return '' def pid_to_cmdline(pid): """ Get process cmdline by pid. pid: str pid of required process returns string cmdline """ with open('/proc/' + pid + '/cmdline') as file: try: return file.readlines()[0].replace('\x00', ' ').strip() except IndexError: return '' def pid_to_uid(pid): with open('/proc/' + pid + '/status') as f: for n, line in enumerate(f): if n is uid_index: return line.split('\t')[1] def send_notify_warn(): """ Look for process with maximum 'badness' and warn user with notification. """ # find process with max badness fat_tuple = fattest() pid = fat_tuple[0] name = pid_to_name(pid) if mem_used_zram > 0: low_mem_percent = '{}% {}% {}%'.format( round(mem_available / mem_total * 100), round(swap_free / (swap_total + 0.1) * 100), round(mem_used_zram / mem_total * 100)) elif swap_free > 0: low_mem_percent = '{}% {}%'.format( round(mem_available / mem_total * 100), round(swap_free / (swap_total + 0.1) * 100)) else: low_mem_percent = '{}%'.format( round(mem_available / mem_total * 100)) title = 'Low memory: {}'.format(low_mem_percent) body = 'Fattest process: {}, {}'.format(pid, name) if root: # If nohang was started by root # send notification to all active users with special script # теперь можно напрямую уведомлять из кэша если он не устарел Popen([ '/usr/bin/nohang_notify_low_mem', '--mem', low_mem_percent, '--pid', pid, '--name', name ]) else: # Or by regular user # send notification to user that runs this nohang try: Popen(['notify-send', '--icon=dialog-warning', '{}'.format(title), '{}'.format(body)]).wait(wait_time) except TimeoutExpired: print('TimeoutExpired: ' + 'notify low mem') def send_notify(signal, name, pid): """ Notificate about OOM Preventing. signal: key for notify_sig_dict name: str process name pid: str process pid """ title = 'Preventing OOM' body = '{} process {}, {}'.format( notify_sig_dict[signal], pid, name.replace( # сивол & может ломать уведомления в некоторых темах оформления, # поэтому заменяется на * '&', '*')) if root: # send notification to all active users with notify-send b = root_notify_env() if len(b) > 0: for i in b: username, display_env, dbus_env = i[0], i[1], i[2] try: Popen(['sudo', '-u', username, 'env', display_env, dbus_env, 'notify-send', '--icon=dialog-warning', '{}'.format(title), '{}'.format(body)]).wait(wait_time) except TimeoutExpired: print('TimeoutExpired: ' + 'notify send signal') else: # send notification to user that runs this nohang try: Popen(['notify-send', '--icon=dialog-warning', '{}'.format(title), '{}'.format(body)]).wait(wait_time) except TimeoutExpired: print('TimeoutExpired: ' + 'notify send signal') def send_notify_etc(pid, name, command): """ Notificate about OOM Preventing. command: str command that will be executed name: str process name pid: str process pid """ title = 'Preventing OOM' body = 'Victim is process {}, {}\nExecute the command:\n{}'.format( pid, name.replace('&', '*'), command.replace('&', '*')) if root: # send notification to all active users with notify-send b = root_notify_env() if len(b) > 0: for i in b: username, display_env, dbus_env = i[0], i[1], i[2] try: Popen(['sudo', '-u', username, 'env', display_env, dbus_env, 'notify-send', '--icon=dialog-warning', '{}'.format(title), '{}'.format(body)]).wait(wait_time) except TimeoutExpired: print('TimeoutExpired: notify run command') else: # send notification to user that runs this nohang Popen(['notify-send', '--icon=dialog-warning', '{}'.format(title), '{}' .format(body)]) def sleep_after_send_signal(signal): """ Sleeping after signal was sent. signal: sent signal """ if signal is SIGKILL: if print_sleep_periods: print(' sleep', min_delay_after_sigkill) sleep(min_delay_after_sigterm) else: if print_sleep_periods: print(' sleep', min_delay_after_sigterm) sleep(min_delay_after_sigterm) def fattest(): """ Find the process with highest badness and its badness adjustment Return pid and badness """ pid_badness_list = [] for pid in os.listdir('/proc'): # only directories whose names consist only of numbers, except /proc/1/ if pid[0].isdecimal() is False or pid is '1' or pid is self_pid: continue # find and modify badness (if it needs) try: badness = int(rline1('/proc/' + pid + '/oom_score')) if decrease_oom_score_adj: oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj')) if badness > oom_score_adj_max and oom_score_adj > 0: badness = badness - oom_score_adj + oom_score_adj_max if regex_matching: name = pid_to_name(pid) for re_tup in processname_re_list: if search(re_tup[1], name) is not None: if pid_to_cmdline(pid) == '': # skip kthreads continue badness += int(re_tup[0]) if re_match_cmdline: cmdline = pid_to_cmdline(pid) if cmdline == '': # skip kthreads continue for re_tup in cmdline_re_list: if search(re_tup[1], cmdline) is not None: badness += int(re_tup[0]) if re_match_uid: uid = pid_to_uid(pid) for re_tup in uid_re_list: if search(re_tup[1], uid) is not None: if pid_to_cmdline(pid) == '': # skip kthreads continue badness += int(re_tup[0]) except FileNotFoundError: continue except ProcessLookupError: continue pid_badness_list.append((pid, badness)) # Make list of (pid, badness) tuples, sorted by 'badness' values pid_tuple_list = sorted( pid_badness_list, key=itemgetter(1), reverse=True)[0] pid = pid_tuple_list[0] # Get maximum 'badness' value victim_badness = pid_tuple_list[1] return pid, victim_badness def find_victim_and_send_signal(signal): """ Find victim with highest badness and send SIGTERM/SIGKILL """ #print() pid, victim_badness = fattest() name = pid_to_name(pid) if victim_badness >= min_badness: # Try to send signal to found victim # Get VmRSS and VmSwap and cmdline of victim process and try to send # signal try: with open('/proc/' + pid + '/status') as f: for n, line in enumerate(f): if n is uid_index: uid = line.split('\t')[1] continue if n is vm_size_index: vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) continue if n is vm_rss_index: vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) continue if detailed_rss: if n is anon_index: anon_rss = kib_to_mib( int(line.split('\t')[1][:-4])) continue if n is file_index: file_rss = kib_to_mib( int(line.split('\t')[1][:-4])) continue if n is shmem_index: shmem_rss = kib_to_mib( int(line.split('\t')[1][:-4])) continue if n is vm_swap_index: vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) break with open('/proc/' + pid + '/cmdline') as file: try: cmdline = file.readlines()[0].replace('\x00', ' ') except IndexError: cmdline = '' except FileNotFoundError: pass except ProcessLookupError: pass except IndexError: pass except ValueError: pass oom_score = rline1('/proc/' + pid + '/oom_score') oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') len_vm = len(str(vm_size)) if detailed_rss: victim_info = '\033[4mFound the victim with highest badness:\033[0m' \ '\n Name: \033[33m{}\033[0m' \ '\n PID: \033[33m{}\033[0m' \ '\n UID: \033[33m{}\033[0m' \ '\n Badness: \033[33m{}\033[0m, ' \ 'oom_score: \033[33m{}\033[0m, ' \ 'oom_score_adj: \033[33m{}\033[0m' \ '\n VmSize: \033[33m{}\033[0m MiB' \ '\n VmRSS: \033[33m{}\033[0m MiB (' \ 'Anon: \033[33m{}\033[0m MiB, ' \ 'File: \033[33m{}\033[0m MiB, ' \ 'Shmem: \033[33m{}\033[0m MiB)' \ '\n VmSwap: \033[33m{}\033[0m MiB' \ '\n CmdLine: \033[33m{}\033[0m'.format( name, pid, uid, victim_badness, oom_score, oom_score_adj, vm_size, str(vm_rss).rjust(len_vm, ' '), anon_rss, file_rss, shmem_rss, str(vm_swap).rjust(len_vm, ' '), cmdline) else: victim_info = '\033[4mFound the victim with highest badness:\033[0m' \ '\n Name: \033[33m{}\033[0m' \ '\n PID: \033[33m{}\033[0m' \ '\n UID: \033[33m{}\033[0m' \ '\n Badness: \033[33m{}\033[0m, ' \ 'oom_score: \033[33m{}\033[0m, ' \ 'oom_score_adj: \033[33m{}\033[0m' \ '\n VmSize: \033[33m{}\033[0m MiB' \ '\n VmRSS: \033[33m{}\033[0m MiB' \ '\n VmSwap: \033[33m{}\033[0m MiB' \ '\n CmdLine: \033[33m{}\033[0m'.format( name, pid, uid, victim_badness, vm_size, str(vm_rss).rjust(len_vm, ' '), str(vm_swap).rjust(len_vm, ' '), cmdline) if execute_the_command and signal is SIGTERM and name in etc_dict: command = etc_dict[name] exit_status = os.system(etc_dict[name]) if exit_status == 0: exit_status = '\033[32m0\033[0m' else: exit_status = '\033[31m{}\033[0m'.format(exit_status) response_time = time() - time0 etc_info = '{}' \ '\n\033[4mImplement corrective action:\033[0m\n Execute the command: \033[4m{}\033[0m' \ '\n Exit status: {}; response time: {} ms'.format( victim_info, command, exit_status, round(response_time * 1000)) # update stat_dict key = "Run the command '\033[35m{}\033[0m'".format(command) if key not in stat_dict: stat_dict.update({key: 1}) else: new_value = stat_dict[key] + 1 stat_dict.update({key: new_value}) print(mem_info) print(etc_info) if gui_notifications: send_notify_etc(pid, name, command) else: try: os.kill(int(pid), signal) response_time = time() - time0 send_result = '\033[32mOK\033[0m; response time: {} ms'.format( round(response_time * 1000)) # update stat_dict key = 'Send \033[35m{}\033[0m to \033[35m{}\033[0m'.format(sig_dict[signal], name) if key not in stat_dict: stat_dict.update({key: 1}) else: new_value = stat_dict[key] + 1 stat_dict.update({key: new_value}) if gui_notifications: send_notify(signal, name, pid) except FileNotFoundError: response_time = time() - time0 send_result = 'no such process; response time: {} ms'.format( round(response_time * 1000)) except ProcessLookupError: response_time = time() - time0 send_result = 'no such process; response time: {} ms'.format( round(response_time * 1000)) preventing_oom_message = '{}' \ '\n\033[4mImplement corrective action:\033[0m\n ' \ 'Sending \033[4m{}\033[0m to the victim; {}'.format( victim_info, sig_dict[signal], send_result) print(mem_info) print(preventing_oom_message) print('\n\033[4mDuration of work: {}; number of corrective actions:\033[0m'.format( format_time(time() - start_time))) for key in stat_dict: print(' - {}: {}'.format(key, stat_dict[key])) else: response_time = time() - time0 victim_badness_is_too_small = ' victim badness {} < min_b' \ 'adness {}; nothing to do; response time: {} ms'.format( victim_badness, min_badness, round(response_time * 1000)) print(victim_badness_is_too_small) sleep_after_send_signal(signal) def sleep_after_check_mem(): """Specify sleep times depends on rates and avialable memory.""" t_mem = mem_available / rate_mem t_swap = swap_free / rate_swap t_zram = (mem_total - mem_used_zram) / rate_zram t_mem_swap = t_mem + t_swap t_mem_zram = t_mem + t_zram if t_mem_swap <= t_mem_zram: t = t_mem_swap else: t = t_mem_zram try: if print_sleep_periods: print('sleep', round(t, 2), ' (t_mem={}, t_swap={}, t_zram={})'.format( round(t_mem, 2), round(t_swap, 2), round(t_zram, 2))) stdout.flush() sleep(t) except KeyboardInterrupt: exit() def calculate_percent(arg_key): """ parse conf dict Calculate mem_min_KEY_percent. Try use this one) arg_key: str key for config_dict returns int mem_min_percent or NoneType if got some error """ if arg_key in config_dict: mem_min = config_dict[arg_key] if mem_min.endswith('%'): # truncate percents, so we have a number mem_min_percent = mem_min[:-1].strip() # then 'float test' mem_min_percent = string_to_float_convert_test(mem_min_percent) if mem_min_percent is None: print('Invalid {} value, not float\nExit'.format(arg_key)) exit() # Final validations... if mem_min_percent < 0 or mem_min_percent > 100: print( '{}, as percents value, out of range [0; 100]\nExit'.format(arg_key)) exit() # mem_min_sigterm_percent is clean and valid float percentage. Can # translate into Kb mem_min_kb = mem_min_percent / 100 * mem_total mem_min_mb = round(mem_min_kb / 1024) elif mem_min.endswith('M'): mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip()) if mem_min_mb is None: print('Invalid {} value, not float\nExit'.format(arg_key)) exit() mem_min_kb = mem_min_mb * 1024 if mem_min_kb > mem_total: print( '{} value can not be greater then MemTotal ({} MiB)\nExit'.format( arg_key, round( mem_total / 1024))) exit() mem_min_percent = mem_min_kb / mem_total * 100 else: print('Invalid {} units in config.\n Exit'.format(arg_key)) mem_min_percent = None else: print('{} not in config\nExit'.format(arg_key)) mem_min_percent = None return mem_min_kb, mem_min_mb, mem_min_percent ########################################################################## # find mem_total # find positions of SwapFree and SwapTotal in /proc/meminfo with open('/proc/meminfo') as file: mem_list = file.readlines() mem_list_names = [] for s in mem_list: mem_list_names.append(s.split(':')[0]) if mem_list_names[2] != 'MemAvailable': print('Your Linux kernel is too old, Linux 3.14+ requied\nExit') exit() swap_total_index = mem_list_names.index('SwapTotal') swap_free_index = swap_total_index + 1 mem_total = int(mem_list[0].split(':')[1].strip(' kB\n')) # Get names from /proc/*/status to be able to get VmRSS and VmSwap values with open('/proc/self/status') as file: status_list = file.readlines() status_names = [] for s in status_list: status_names.append(s.split(':')[0]) vm_size_index = status_names.index('VmSize') vm_rss_index = status_names.index('VmRSS') vm_swap_index = status_names.index('VmSwap') uid_index = status_names.index('Uid') try: anon_index = status_names.index('RssAnon') file_index = status_names.index('RssFile') shmem_index = status_names.index('RssShmem') detailed_rss = True #print(detailed_rss, 'detailed_rss') except ValueError: detailed_rss = False #print('It is not Linux 4.5+') ########################################################################## # Configurations # directory where the script is running cd = os.getcwd() #print('CD:', cd) # where to look for a config if not specified via the -c/--config option default_configs = (cd + '/nohang.conf', '/etc/nohang/nohang.conf') # universal message if config is invalid conf_err_mess = '\nSet up the path to the valid conf' \ 'ig file with -c/--config option!\nExit' # Cmd argparse parser = ArgumentParser() parser.add_argument( '-c', '--config', help="""path to the config file, default values: ./nohang.conf, /etc/nohang/nohang.conf""", default=None, type=str ) args = parser.parse_args() arg_config = args.config if arg_config is None: config = None for i in default_configs: if os.path.exists(i): config = i break if config is None: print('Default configuration was not found\n', conf_err_mess) exit() else: if os.path.exists(arg_config): config = arg_config else: print("File {} doesn't exists{}".format( arg_config, conf_err_mess)) exit() print('The path to the config:', config) ########################################################################## # parsing the config with obtaining the parameters dictionary # conf_parameters_dict # conf_restart_dict # dictionary with config options config_dict = dict() processname_re_list = [] cmdline_re_list = [] uid_re_list = [] # dictionary with names and commands for the parameter # execute_the_command # тут тоже список нужен, а не словарь etc_dict = dict() # will store corrective actions stat stat_dict = dict() try: with open(config) as f: for line in f: a = line.startswith('#') b = line.startswith('\n') c = line.startswith('\t') d = line.startswith(' ') etc = line.startswith('$ETC') if not a and not b and not c and not d and not etc: a = line.partition('=') config_dict[a[0].strip()] = a[2].strip() if etc: a = line[4:].split('///') etc_name = a[0].strip() etc_command = a[1].strip() if len(etc_name) > 15: print('Invalid config, the length of the process ' 'name must not exceed 15 characters\nExit') exit() etc_dict[etc_name] = etc_command # NEED VALIDATION! if line.startswith('@PROCESSNAME_RE'): a = line.partition('@PROCESSNAME_RE')[2].strip(' \n').partition('///') processname_re_list.append((a[0].strip(' '), a[2].strip(' '))) if line.startswith('@CMDLINE_RE'): a = line.partition('@CMDLINE_RE')[2].strip(' \n').partition('///') cmdline_re_list.append((a[0].strip(' '), a[2].strip(' '))) if line.startswith('@UID_RE'): a = line.partition('@UID_RE')[2].strip(' \n').partition('///') uid_re_list.append((a[0].strip(' '), a[2].strip(' '))) except PermissionError: print('PermissionError', conf_err_mess) exit() except UnicodeDecodeError: print('UnicodeDecodeError', conf_err_mess) exit() except IsADirectoryError: print('IsADirectoryError', conf_err_mess) exit() except IndexError: print('IndexError', conf_err_mess) exit() # print(processname_re_list) # print(cmdline_re_list) # print(uid_re_list) ########################################################################## # extracting parameters from the dictionary # check for all necessary parameters # validation of all parameters print_config = conf_parse_bool('print_config') print_mem_check_results = conf_parse_bool('print_mem_check_results') print_sleep_periods = conf_parse_bool('print_sleep_periods') realtime_ionice = conf_parse_bool('realtime_ionice') mlockall = conf_parse_bool('mlockall') gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings') gui_notifications = conf_parse_bool('gui_notifications') decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj') execute_the_command = conf_parse_bool('execute_the_command') regex_matching = conf_parse_bool('regex_matching') re_match_cmdline = conf_parse_bool('re_match_cmdline') re_match_uid = conf_parse_bool('re_match_uid') if regex_matching or re_match_cmdline or re_match_uid: from re import search import sre_constants mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent = calculate_percent( 'mem_min_sigterm') mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent = calculate_percent( 'mem_min_sigkill') zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent = calculate_percent( 'zram_max_sigterm') zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent = calculate_percent( 'zram_max_sigkill') mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent = calculate_percent( 'mem_min_warnings') zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent = calculate_percent( 'zram_max_warnings') if 'realtime_ionice_classdata' in config_dict: realtime_ionice_classdata = string_to_int_convert_test( config_dict['realtime_ionice_classdata']) if realtime_ionice_classdata is None: print('Invalid value of the "realtime_ionice_classdata" parameter.') print('Valid values are integers from the range [0; 7].') print('Exit') exit() if realtime_ionice_classdata < 0 or realtime_ionice_classdata > 7: print('Invalid value of the "realtime_ionice_classdata" parameter.') print('Valid values are integers from the range [0; 7].') print('Exit') exit() else: print('All the necessary parameters must be in the config') print('There is no "realtime_ionice_classdata" parameter in the config') exit() if 'niceness' in config_dict: niceness = string_to_int_convert_test(config_dict['niceness']) if niceness is None: print('Invalid niceness value, not integer\nExit') exit() if niceness < -20 or niceness > 19: print('niceness out of range [-20; 19]\nExit') exit() else: print('niceness not in config\nExit') exit() if 'oom_score_adj' in config_dict: oom_score_adj = string_to_int_convert_test( config_dict['oom_score_adj']) if oom_score_adj is None: print('Invalid oom_score_adj value, not integer\nExit') exit() if oom_score_adj < -1000 or oom_score_adj > 1000: print('oom_score_adj out of range [-1000; 1000]\nExit') exit() else: print('oom_score_adj not in config\nExit') exit() if 'rate_mem' in config_dict: rate_mem = string_to_float_convert_test(config_dict['rate_mem']) if rate_mem is None: print('Invalid rate_mem value, not float\nExit') exit() if rate_mem <= 0: print('rate_mem MUST be > 0\nExit') exit() else: print('rate_mem not in config\nExit') exit() if 'rate_swap' in config_dict: rate_swap = string_to_float_convert_test(config_dict['rate_swap']) if rate_swap is None: print('Invalid rate_swap value, not float\nExit') exit() if rate_swap <= 0: print('rate_swap MUST be > 0\nExit') exit() else: print('rate_swap not in config\nExit') exit() if 'rate_zram' in config_dict: rate_zram = string_to_float_convert_test(config_dict['rate_zram']) if rate_zram is None: print('Invalid rate_zram value, not float\nExit') exit() if rate_zram <= 0: print('rate_zram MUST be > 0\nExit') exit() else: print('rate_zram not in config\nExit') exit() # НУЖНА ВАЛИДАЦИЯ НА МЕСТЕ! if 'swap_min_sigterm' in config_dict: swap_min_sigterm = config_dict['swap_min_sigterm'] else: print('swap_min_sigterm not in config\nExit') exit() # НУЖНА ВАЛИДАЦИЯ НА МЕСТЕ! if 'swap_min_sigkill' in config_dict: swap_min_sigkill = config_dict['swap_min_sigkill'] else: print('swap_min_sigkill not in config\nExit') exit() if 'min_delay_after_sigterm' in config_dict: min_delay_after_sigterm = string_to_float_convert_test( config_dict['min_delay_after_sigterm']) if min_delay_after_sigterm is None: print('Invalid min_delay_after_sigterm value, not float\nExit') exit() if min_delay_after_sigterm < 0: print('min_delay_after_sigterm must be positiv\nExit') exit() else: print('min_delay_after_sigterm not in config\nExit') exit() if 'min_delay_after_sigkill' in config_dict: min_delay_after_sigkill = string_to_float_convert_test( config_dict['min_delay_after_sigkill']) if min_delay_after_sigkill is None: print('Invalid min_delay_after_sigkill value, not float\nExit') exit() if min_delay_after_sigkill < 0: print('min_delay_after_sigkill must be positiv\nExit') exit() else: print('min_delay_after_sigkill not in config\nExit') exit() if 'min_badness' in config_dict: min_badness = string_to_int_convert_test( config_dict['min_badness']) if min_badness is None: print('Invalid min_badness value, not integer\nExit') exit() if min_badness < 0 or min_badness > 1000: print('Invalud min_badness value\nExit') exit() else: print('min_badness not in config\nExit') exit() if 'oom_score_adj_max' in config_dict: oom_score_adj_max = string_to_int_convert_test( config_dict['oom_score_adj_max']) if oom_score_adj_max is None: print('Invalid oom_score_adj_max value, not integer\nExit') exit() if oom_score_adj_max < 0 or oom_score_adj_max > 1000: print('Invalid oom_score_adj_max value\nExit') exit() else: print('oom_score_adj_max not in config\nExit') exit() if 'min_time_between_warnings' in config_dict: min_time_between_warnings = string_to_float_convert_test( config_dict['min_time_between_warnings']) if min_time_between_warnings is None: print('Invalid min_time_between_warnings value, not float\nExit') exit() if min_time_between_warnings < 1 or min_time_between_warnings > 300: print('min_time_between_warnings value out of range [1; 300]\nExit') exit() else: print('min_time_between_warnings not in config\nExit') exit() # НА МЕСТЕ!!! if 'swap_min_warnings' in config_dict: swap_min_warnings = config_dict['swap_min_warnings'] else: print('swap_min_warnings not in config\nExit') exit() ########################################################################## # Get Kibibytes levels # Returns Kibibytes value if absolute value was set in config, # or tuple with percentage def sig_level_to_kb_swap(string): """Returns Kibibytes value if abs val was set in config, or tuple with %""" if string.endswith('%'): return float(string[:-1].strip()), True elif string.endswith('M'): return float(string[:-1].strip()) * 1024 else: print('Invalid config file. There are invalid units somewhere\nExit') exit() # So, get them swap_min_sigterm_swap = sig_level_to_kb_swap(swap_min_sigterm) swap_min_sigkill_swap = sig_level_to_kb_swap(swap_min_sigkill) swap_min_warnings_swap = sig_level_to_kb_swap(swap_min_warnings) if isinstance(swap_min_sigterm_swap, tuple): swap_term_is_percent = True swap_min_sigterm_percent = swap_min_sigterm_swap[0] else: swap_term_is_percent = False swap_min_sigterm_kb = swap_min_sigterm_swap if isinstance(swap_min_sigkill_swap, tuple): swap_kill_is_percent = True swap_min_sigkill_percent = swap_min_sigkill_swap[0] else: swap_kill_is_percent = False swap_min_sigkill_kb = swap_min_sigkill_swap if isinstance(swap_min_warnings_swap, tuple): swap_warn_is_percent = True swap_min_warnings_percent = swap_min_warnings_swap[0] else: swap_warn_is_percent = False swap_min_warnings_kb = swap_min_warnings_swap ########################################################################## # self-defense # возожно стоит убрать поддержку mlockall и ionice # Increase priority try: os.nice(niceness) niceness_result = 'OK' except PermissionError: niceness_result = 'Fail' pass # Deny self-killing try: with open('/proc/self/oom_score_adj', 'w') as file: file.write('{}\n'.format(oom_score_adj)) oom_score_adj_result = 'OK' except PermissionError: oom_score_adj_result = 'Fail' except OSError: oom_score_adj_result = 'Fail' # Deny process swapping if mlockall: from ctypes import CDLL result = CDLL('libc.so.6', use_errno=True).mlockall(3) if result is 0: mla_res = 'OK' else: mla_res = 'Fail' else: mla_res = '' if self_uid == 0: root = True decrease_res = 'OK' else: root = False decrease_res = 'Impossible' if root and realtime_ionice: os.system('ionice -c 1 -n {} -p {}'.format( realtime_ionice_classdata, self_pid)) ########################################################################## if print_config: print( '\n1. Memory levels to respond to as an OOM threat\n[displaying these options need fix]\n') print('mem_min_sigterm: {} MiB, {} %'.format( round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1))) print('mem_min_sigkill: {} MiB, {} %'.format( round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1))) print('swap_min_sigterm: {}'.format(swap_min_sigterm)) print('swap_min_sigkill: {}'.format(swap_min_sigkill)) print('zram_max_sigterm: {} MiB, {} %'.format( round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1))) print('zram_max_sigkill: {} MiB, {} %'.format( round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1))) print('\n2. The frequency of checking the level of available memory (and CPU usage)\n') print('rate_mem: {}'.format(rate_mem)) print('rate_swap: {}'.format(rate_swap)) print('rate_zram: {}'.format(rate_zram)) print('\n3. The prevention of killing innocent victims\n') print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm)) print('min_delay_after_sigkill: {}'.format(min_delay_after_sigkill)) print('min_badness: {}'.format(min_badness)) # False (OK) - OK не нужен когда фолс print('decrease_oom_score_adj: {}'.format( decrease_oom_score_adj )) if decrease_oom_score_adj: print('oom_score_adj_max: {}'.format(oom_score_adj_max)) print('\n4. Impact on the badness of processes via matching their' \ ' names, cmdlines ir UIDs with regular expressions\n') print('regex_matching: {}'.format(regex_matching)) if regex_matching: print('prefer_regex: {}'.format(prefer_regex)) print('prefer_factor: {}'.format(prefer_factor)) print('avoid_regex: {}'.format(avoid_regex)) print('avoid_factor: {}'.format(avoid_factor)) print() print('re_match_cmdline: {}'.format(re_match_cmdline)) if re_match_cmdline: print('prefer_re_cmdline: {}'.format(prefer_re_cmdline)) print('prefer_cmd_factor: {}'.format(prefer_cmd_factor)) print('avoid_re_cmdline: {}'.format(avoid_re_cmdline)) print('avoid_cmd_factor: {}'.format(avoid_cmd_factor)) print() print('re_match_uid: {}'.format(re_match_uid)) if re_match_uid: print('prefer_re_uid: {}'.format(prefer_re_uid)) print('prefer_uid_factor: {}'.format(prefer_uid_factor)) print('avoid_re_uid: {}'.format(avoid_re_uid)) print('avoid_uid_factor: {}'.format(avoid_uid_factor)) print('\n5. The execution of a specific command instead of sending the\nSIGTERM signal\n') print('execute_the_command: {}'.format(execute_the_command)) if execute_the_command: print('\nPROCESS NAME COMMAND TO EXECUTE') for key in etc_dict: print('{} {}'.format(key.ljust(15), etc_dict[key])) print('\n6. GUI notifications:\n- OOM prevention results and\n- low memory warnings\n') print('gui_notifications: {}'.format(gui_notifications)) print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings)) if gui_low_memory_warnings: print('min_time_between_warnings: {}'.format(min_time_between_warnings)) print('mem_min_warnings: {} MiB, {} %'.format( round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1))) print('swap_min_warnings: {}'.format(swap_min_warnings)) print('zram_max_warnings: {} MiB, {} %'.format( round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1))) print( '\n7. Preventing the slowing down of the program' '\n[displaying these options need fix]\n') print('mlockall: {} ({})'.format(mlockall, mla_res)) print('niceness: {} ({})'.format( niceness, niceness_result )) print('oom_score_adj: {} ({})'.format( oom_score_adj, oom_score_adj_result )) print('realtime_ionice: {} ({})'.format(realtime_ionice, '')) if realtime_ionice: print('realtime_ionice_classdata: {}'.format(realtime_ionice_classdata)) print('\n8. Output verbosity\n') print('print_config: {}'.format(print_config)) print('print_mem_check_results: {}'.format(print_mem_check_results)) print('print_sleep_periods: {}\n'.format(print_sleep_periods)) ########################################################################## # for calculating the column width when printing mem and zram mem_len = len(str(round(mem_total / 1024.0))) if gui_notifications or gui_low_memory_warnings: from subprocess import Popen, TimeoutExpired notify_sig_dict = {SIGKILL: 'Killing', SIGTERM: 'Terminating'} rate_mem = rate_mem * 1048576 rate_swap = rate_swap * 1048576 rate_zram = rate_zram * 1048576 warn_time_now = 0 warn_time_delta = 1000 warn_timer = 0 x = time() - start_time print('The duration of startup:', round(x * 1000, 1), 'ms') print('Monitoring started!') def save_env_cache(): z = '{}\n'.format(int(time())) a = root_notify_env() #print(a) for i in a: z = z + '{}\x00{}\x00{}\n'.format(i[0], i[1], i[2]) write(cache_path, z) os.chmod(cache_path, 0000) return a def read_env_cache(): x, y = [], [] try: with open(cache_path) as f: for n, line in enumerate(f): if n is 0: t = line[:-1] y.append(t) continue if n > 0: x.append(line[:-1].split('\x00')) except FileNotFoundError: return None y.append(x) return y def root_env_cache(): cache = read_env_cache() if cache is None: print('cache not found, get new env and cache it') return save_env_cache() delta_t = time() - int(cache[0]) if delta_t > cache_time: print('cache time: {}, delta: {}, ' \ 'get new env and cache it'.format( cache_time, round(delta_t))) save_env_cache() return root_notify_env() else: print('cache time: {}, delta: {}, ' \ 'get cached env'.format( cache_time, round(delta_t))) return cache[1] t1 = time() # root_env_cache() t2 = time() # print(t2 - t1) stdout.flush() #exit() ########################################################################## while True: # find mem_available, swap_total, swap_free with open('/proc/meminfo') as f: for n, line in enumerate(f): if n is 2: mem_available = int(line.split(':')[1].strip(' kB\n')) continue if n is swap_total_index: swap_total = int(line.split(':')[1].strip(' kB\n')) continue if n is swap_free_index: swap_free = int(line.split(':')[1].strip(' kB\n')) break # if swap_min_sigkill is set in percent if swap_kill_is_percent: swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0 if swap_term_is_percent: swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0 if swap_warn_is_percent: swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0 # find MemUsedZram disksize_sum = 0 mem_used_total_sum = 0 for dev in os.listdir('/sys/block'): if dev.startswith('zram'): stat = zram_stat(dev) disksize_sum += int(stat[0]) mem_used_total_sum += int(stat[1]) # Означает, что при задани zram disksize = 10000M доступная память # уменьшится на 42 MiB. # Найден экспериментально, требует уточнения с разными ядрами и архитектурами. # На небольших дисксайзах (до гигабайта) может быть больше, до 0.0045. # Создатель модуля zram утверждает, что ZRAM_DISKSIZE_FACTOR доожен быть 0.001 # ("zram uses about 0.1% of the size of the disk" # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt), # но это утверждение противоречит опытным данным. # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize # found experimentally ZRAM_DISKSIZE_FACTOR = 0.0042 mem_used_zram = ( mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR ) / 1024.0 if print_mem_check_results: # Calculate 'swap-column' width swap_len = len(str(round(swap_total / 1024.0))) # Output avialable mem sizes if swap_total == 0 and mem_used_zram == 0: print('MemAvail: {} M, {} %'.format( human(mem_available, mem_len), just_percent_mem(mem_available / mem_total))) elif swap_total > 0 and mem_used_zram == 0: print('MemAvail: {} M, {} % | SwapFree: {} M, {} %'.format( human(mem_available, mem_len), just_percent_mem(mem_available / mem_total), human(swap_free, swap_len), just_percent_swap(swap_free / (swap_total + 0.1)))) else: print('MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem' 'UsedZram: {} M, {} %'.format( human(mem_available, mem_len), just_percent_mem(mem_available / mem_total), human(swap_free, swap_len), just_percent_swap(swap_free / (swap_total + 0.1)), human(mem_used_zram, mem_len), just_percent_mem(mem_used_zram / mem_total))) ''' if nc > nm: nc = 0 print('MemAvailable, MiB:', human(mem_available, mem_len)) else: nc += 1 ''' # если swap_min_sigkill задан в абсолютной величине и Swap_total = 0 if swap_total > swap_min_sigkill_kb: # If swap_min_sigkill is absolute swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1)) else: swap_sigkill_pc = '-' if swap_total > swap_min_sigterm_kb: swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1)) else: # СТОИТ ПЕЧАТАТЬ СВОП ТОЛЬКО ПРИ SwapTotal > 0 # нет, печатать так: SwapTotal = 0, ignore swapspace swap_sigterm_pc = '-' # Limits overdrafting checks # If overdrafted - try to prevent OOM # else - just sleep # MEM SWAP KILL if mem_available <= mem_min_sigkill_kb and \ swap_free <= swap_min_sigkill_kb: time0 = time() mem_info = '\n\033[4mMemory status that requires corrective actions:' \ '\033[0m\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ 'p_min_sigkill [{} MiB, {} %]'.format( kib_to_mib(mem_available), percent(mem_available / mem_total), kib_to_mib(mem_min_sigkill_kb), percent(mem_min_sigkill_kb / mem_total), kib_to_mib(swap_free), percent(swap_free / (swap_total + 0.1)), kib_to_mib(swap_min_sigkill_kb), swap_sigkill_pc) find_victim_and_send_signal(SIGKILL) # ZRAM KILL elif mem_used_zram >= zram_max_sigkill_kb: time0 = time() mem_info = '\n\033[4mMemory status that requires corrective actions:' \ '\033[0m\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \ 'kill [{} MiB, {} %]'.format( kib_to_mib(mem_used_zram), percent(mem_used_zram / mem_total), kib_to_mib(zram_max_sigkill_kb), percent(zram_max_sigkill_kb / mem_total)) find_victim_and_send_signal(SIGKILL) # MEM SWAP TERM elif mem_available <= mem_min_sigterm_kb and \ swap_free <= swap_min_sigterm_kb: time0 = time() mem_info = '\n\033[4mMemory status that requires corrective actions:' \ '\033[0m\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ 'p_min_sigterm [{} MiB, {} %]'.format( kib_to_mib(mem_available), percent(mem_available / mem_total), kib_to_mib(mem_min_sigterm_kb), #percent(mem_min_sigterm_kb / mem_total), # ОКРУГЛЯТЬ НА МЕСТЕ ВЫШЕ round(mem_min_sigterm_percent, 1), kib_to_mib(swap_free), percent(swap_free / (swap_total + 0.1)), kib_to_mib(swap_min_sigterm_kb), swap_sigterm_pc) find_victim_and_send_signal(SIGTERM) # ZRAM TERM elif mem_used_zram >= zram_max_sigterm_kb: time0 = time() mem_info = '\n\033[4mMemory status that requires corrective actions:' \ '\033[0m\n MemUsedZram [{} MiB, {} %] >= ' \ 'zram_max_sigterm [{} M, {} %]'.format( kib_to_mib(mem_used_zram), percent(mem_used_zram / mem_total), kib_to_mib(zram_max_sigterm_kb), percent(zram_max_sigterm_kb / mem_total)) find_victim_and_send_signal(SIGTERM) # LOW MEMORY WARNINGS elif gui_low_memory_warnings: if mem_available <= mem_min_warnings_kb and \ swap_free <= swap_min_warnings_kb + 0.1 or \ mem_used_zram >= zram_max_warnings_kb: warn_time_delta = time() - warn_time_now warn_time_now = time() warn_timer += warn_time_delta if warn_timer > min_time_between_warnings: send_notify_warn() warn_timer = 0 sleep_after_check_mem() # SLEEP BETWEEN MEM CHECKS else: sleep_after_check_mem()