diff --git a/README.md b/README.md index 86a289f..b230dfe 100644 --- a/README.md +++ b/README.md @@ -149,17 +149,16 @@ optional arguments: The program can be configured by editing the [config file](https://github.com/hakavlad/nohang/blob/master/nohang.conf). The configuration includes the following sections: -1. Memory levels to respond to as an OOM threat -2. Response on PSI memory metrics -3. The frequency of checking the level of available memory (and CPU usage) -4. The prevention of killing innocent victims -5. Impact on the badness of processes via matching their names, cgroups, realpaths, cmdlines and UIDs with certain regular expressions -6. The execution of a specific command or sending any signal instead of sending the SIGTERM signal -7. GUI notifications: - - notifications of corrective actions taken - - low memory warnings (or executing certain command instead) -8. Verbosity -9. Misc +1. Common zram settings +2. Common PSI settings +3. Poll rate +4. Warnings and notifications +5. Soft threshold +6. Hard threshold +7. Customize victim selection +8. Customize soft corrective actions +9. Misc settings +10. Verbosity, debug, logging Just read the description of the parameters and edit the values. Please restart nohang to apply the changes. Default path to the config after installing is `/etc/nohang/nohang.conf`. diff --git a/nohang b/nohang index abdbb63..16d3a30 100755 --- a/nohang +++ b/nohang @@ -9,44 +9,67 @@ from sys import stdout, stderr, argv, exit from re import search from sre_constants import error as invalid_re from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP -from threading import Thread -########################################################################## +############################################################################### # define functions def exe(cmd): - """ execute cmd + """ execute cmd in subprocess.Popen() """ - log('Execute the command: {}'.format(cmd)) - t0 = monotonic() - write_self_oom_score_adj(self_oom_score_adj_max) - err = os.system(cmd) - write_self_oom_score_adj(self_oom_score_adj_min) - dt = monotonic() - t0 - log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3))) - return err + + cmd_num_dict['cmd_num'] += 1 + cmd_num = cmd_num_dict['cmd_num'] + log('Execute the command({}) in {}: {}'.format( + cmd_num, + threading.current_thread().getName(), + cmd)) + t3 = monotonic() + with Popen(cmd, shell=True) as proc: + try: + proc.wait(timeout=exe_timeout) + exit_status = proc.poll() + t4 = monotonic() + log('Command({}) execution completed in {} sec; exit status' \ + ': {}'.format(cmd_num, round(t4 - t3, 3), exit_status)) + except TimeoutExpired: + proc.kill() + t4 = monotonic() + log('TimeoutExpired for the command({}) in {} sec'.format( + cmd_num, round(t4 - t3, 3))) -def go(func, *a): - """ run func in new thread +def start_thread(func, *a, **k): + """ run function in a new thread """ - t1 = monotonic() - th = Thread(target=func, args=a) + + th = threading.Thread(target=func, args=a, kwargs=k) th_name = th.getName() + if debug_threading: - log('Starting {}'.format(th_name)) + + log('Starting {} from {}'.format( + th_name, threading.current_thread().getName() + )) + try: + + t1 = monotonic() th.start() t2 = monotonic() + if debug_threading: - log('{} has started in {} ms'.format( - th_name, round((t2 - t1) * 1000, 1))) + log('{} has started in {} ms, {} threads currently alive'.format( + th_name, round((t2 - t1) * 1000, 1), threading.active_count() + )) + except RuntimeError: - if debug_threading: - log('RuntimeError: cannot start {}'.format(th_name)) + + log('RuntimeError: cannot start {}'.format(th_name)) + + return 1 def re_pid_environ(pid): @@ -57,7 +80,6 @@ def re_pid_environ(pid): 'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus') returns None if these vars is not in /proc/[pid]/environ """ - try: with open('/proc/' + pid + '/environ') as f: env = f.read() @@ -128,8 +150,9 @@ def root_notify_env(): def pop(cmd, username): + """ run cmd in subprocess.Popen() """ - """ + if swap_total == 0: wait_time = 2 else: @@ -140,6 +163,7 @@ def pop(cmd, username): with Popen(cmd) as proc: try: proc.wait(timeout=wait_time) + err = proc.poll() except TimeoutExpired: proc.kill() if debug_gui_notifications: @@ -147,8 +171,12 @@ def pop(cmd, username): t4 = monotonic() + err = 0 + if debug_gui_notifications: - log('Popen time: {} sec; cmd: {}'.format(round(t4 - t3, 3), cmd)) + pass + #log('Popen time: {} sec; exit status: {}; cmd: {}'.format(round(t4 - t3, 3), err, cmd)) + log('Popen time: {} sec; exit status: {}; cmd: {}'.format(round(t4 - t3, 3), err, cmd)) def send_notification(title, body): @@ -214,7 +242,7 @@ def send_notification(title, body): body ] - go(pop, cmd, username) + start_thread(pop, cmd, username) else: if debug_gui_notifications: @@ -227,7 +255,7 @@ def send_notify_warn(): log('Warning threshold exceeded') if check_warning_exe: - go(exe, warning_exe) + start_thread(exe, warning_exe) else: @@ -238,7 +266,7 @@ def send_notify_warn(): round(swap_free / (swap_total + 0.1) * 100) ) - go(send_notification, title, body) + start_thread(send_notification, title, body) def send_notify(threshold, name, pid): @@ -261,7 +289,7 @@ def send_notify(threshold, name, pid): ) ) - go(send_notification, title, body) + start_thread(send_notification, title, body) def send_notify_etc(pid, name, command): @@ -277,43 +305,27 @@ def send_notify_etc(pid, name, command): 'mmand:\n{}'.format( pid, name.replace('&', '*'), command.replace('&', '*')) - go(send_notification, title, body) + start_thread(send_notification, title, body) def check_config(): """ """ - log('#' * 79) - log('0. Common zram settings') + log('\n1. Common zram settings') - log(' zram_checking_enabled: {}'.format(zram_checking_enabled)) + log(' zram_checking_enabled: {}'.format(zram_checking_enabled)) - log('1. Thresholds below which a signal should be sent to the victim') - - log(' soft_threshold_min_mem: {} MiB, {} %'.format( - round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1))) - log(' hard_threshold_min_mem: {} MiB, {} %'.format( - round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1))) - log(' soft_threshold_min_swap: {}'.format(soft_threshold_min_swap)) - log(' hard_threshold_min_swap: {}'.format(hard_threshold_min_swap)) - log(' soft_threshold_max_zram: {} MiB, {} %'.format( - round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1))) - log(' hard_threshold_max_zram: {} MiB, {} %'.format( - round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1))) - - log('2. Response on PSI memory metrics') + log('\n2. Common PSI settings') log(' psi_checking_enabled: {}'.format(psi_checking_enabled)) log(' psi_path: {}'.format(psi_path)) log(' psi_metrics: {}'.format(psi_metrics)) - log(' soft_threshold_max_psi: {}'.format(soft_threshold_max_psi)) - log(' hard_threshold_max_psi: {}'.format(hard_threshold_max_psi)) log(' psi_excess_duration: {} sec'.format(psi_excess_duration)) log(' psi_post_action_delay: {} sec'.format(psi_post_action_delay)) - log('3. The frequency of checking the amount of available memory') + log('\n3. Poll rate') log(' fill_rate_mem: {}'.format(fill_rate_mem)) log(' fill_rate_swap: {}'.format(fill_rate_swap)) @@ -322,18 +334,56 @@ def check_config(): log(' min_sleep: {} sec'.format(min_sleep)) log(' over_sleep: {} sec'.format(over_sleep)) - log('4. The prevention of killing innocent victims') + log('\n4. Warnings and notifications') - log(' min_badness: {}'.format(min_badness)) - log(' post_soft_action_delay: {} sec'.format(post_soft_action_delay)) - log(' post_zombie_delay: {} sec'.format(post_zombie_delay)) - log(' victim_cache_time: {} sec'.format(victim_cache_time)) - log(' ignore_positive_oom_score_adj: {}'.format( - ignore_positive_oom_score_adj)) + log(' post_action_gui_notifications: {}'.format( + post_action_gui_notifications)) - log('5. Impact on the badness of processes') + log(' low_memory_warnings_enabled: {}'.format( + low_memory_warnings_enabled)) + log(' warning_exe: {}'.format(warning_exe)) + log(' warning_threshold_min_mem: {} MiB, {} %'.format(round( + warning_threshold_min_mem_mb), round( + warning_threshold_min_mem_percent, 1))) + log(' warning_threshold_min_swap: {}'.format + (warning_threshold_min_swap)) + log(' warning_threshold_max_zram: {} MiB, {} %'.format(round( + warning_threshold_max_zram_mb), round( + warning_threshold_max_zram_percent, 1))) + log(' warning_threshold_max_psi: {}'.format( + warning_threshold_max_psi)) + log(' min_post_warning_delay: {} sec'.format( + min_post_warning_delay)) - log('5.1. Matching process names with RE patterns') + log(' env_cache_time: {}'.format(env_cache_time)) + + log('\n5. Soft threshold') + + log(' soft_threshold_min_mem: {} MiB, {} %'.format(round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1))) + log(' soft_threshold_min_swap: {}'.format(soft_threshold_min_swap)) + log(' soft_threshold_max_zram: {} MiB, {} %'.format(round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1))) + log(' soft_threshold_max_psi: {}'.format(soft_threshold_max_psi)) + + log('\n6. Hard threshold') + + log(' hard_threshold_min_mem: {} MiB, {} %'.format(round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1))) + log(' hard_threshold_min_swap: {}'.format(hard_threshold_min_swap)) + log(' hard_threshold_max_zram: {} MiB, {} %'.format(round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1))) + log(' hard_threshold_max_psi: {}'.format(hard_threshold_max_psi)) + + log('\n7. Customize victim selection: adjusting badness of processes') + + log('\n7.1. Ignore positive oom_score_adj') + + log(' ignore_positive_oom_score_adj: {}'.format(ignore_positive_oom_score_adj)) + + log('\n7.2. Forbid negative badness') + + log(' forbid_negative_badness: {}'.format(forbid_negative_badness)) + + log('\n7.3. ') + + log('7.3.1. Matching process names with RE patterns') if len(badness_adj_re_name_list) > 0: log(' regexp: badness_adj:') for i in badness_adj_re_name_list: @@ -341,7 +391,7 @@ def check_config(): else: log(' (not set)') - log('5.2. Matching CGroup_v1-line with RE patterns') + log('7.3.2. Matching CGroup_v1-line with RE patterns') if len(badness_adj_re_cgroup_v1_list) > 0: log(' regexp: badness_adj:') for i in badness_adj_re_cgroup_v1_list: @@ -349,7 +399,7 @@ def check_config(): else: log(' (not set)') - log('5.3. Matching CGroup_v2-line with RE patterns') + log('7.3.3. Matching CGroup_v2-line with RE patterns') if len(badness_adj_re_cgroup_v2_list) > 0: log(' regexp: badness_adj:') for i in badness_adj_re_cgroup_v1_list: @@ -357,7 +407,7 @@ def check_config(): else: log(' (not set)') - log('5.4. Matching eUIDs with RE patterns') + log('7.3.4. Matching eUIDs with RE patterns') if len(badness_adj_re_cgroup_v2_list) > 0: log(' regexp: badness_adj:') for i in badness_adj_re_uid_list: @@ -365,7 +415,7 @@ def check_config(): else: log(' (not set)') - log('5.5. Matching realpath with RE patterns') + log('7.3.5. Matching realpath with RE patterns') if len(badness_adj_re_cgroup_v2_list) > 0: log(' regexp: badness_adj:') for i in badness_adj_re_realpath_list: @@ -373,7 +423,7 @@ def check_config(): else: log(' (not set)') - log('5.6. Matching cmdlines with RE patterns') + log('7.3.6. Matching cmdlines with RE patterns') if len(badness_adj_re_cgroup_v2_list) > 0: log(' regexp: badness_adj:') for i in badness_adj_re_cmdline_list: @@ -381,7 +431,7 @@ def check_config(): else: log(' (not set)') - log('5.7. Matching environ with RE patterns') + log('7.3.7. Matching environ with RE patterns') if len(badness_adj_re_cgroup_v2_list) > 0: log(' regexp: badness_adj:') for i in badness_adj_re_environ_list: @@ -389,7 +439,7 @@ def check_config(): else: log(' (not set)') - log('6. Customize corrective actions') + log('\n8. Customize corrective actions') if len(soft_actions_list) > 0: log(' Match by: regexp: command: ') @@ -398,45 +448,43 @@ def check_config(): else: log(' (not set)') - log('7. GUI notifications') + log('\n9. Misc') - log(' post_action_gui_notifications: {}'.format( - post_action_gui_notifications)) - log(' low_memory_warnings_enabled: {}'.format( - low_memory_warnings_enabled)) - log(' warning_exe: {}'.format(warning_exe)) - log(' warning_threshold_min_mem: {} MiB, {} %'.format(round( - warning_threshold_min_mem_mb), round(warning_threshold_min_mem_percent, 1))) - log(' warning_threshold_min_swap: {}'.format(warning_threshold_min_swap)) - log(' warning_threshold_max_zram: {} MiB, {} %'.format(round( - warning_threshold_max_zram_mb), round(warning_threshold_max_zram_percent, 1))) - log(' warning_threshold_max_psi: {}'.format(warning_threshold_max_psi)) - log(' min_post_warning_delay: {} sec'.format(min_post_warning_delay)) + log(' max_soft_exit_time: {} sec'.format(max_soft_exit_time)) - log('8. Verbosity') + log(' post_kill_exe: {}'.format(post_kill_exe)) + + log(' min_badness: {}'.format(min_badness)) + + log(' post_soft_action_delay: {} sec'.format( + post_soft_action_delay)) + log(' post_zombie_delay: {} sec'.format(post_zombie_delay)) + log(' victim_cache_time: {} sec'.format(victim_cache_time)) + log(' exe_timeout: {} sec'.format(exe_timeout)) + + log('\n10. Verbosity') log(' print_config_at_startup: {}'.format(print_config_at_startup)) + log(' print_mem_check_results: {}'.format(print_mem_check_results)) - log(' min_mem_report_interval: {} sec'.format(min_mem_report_interval)) - log(' debug_sleep: {}'.format(debug_sleep)) - log(' print_statistics: {}'.format(print_statistics)) + log(' min_mem_report_interval: {} sec'.format( + min_mem_report_interval)) + log(' print_proc_table: {}'.format(print_proc_table)) log(' extra_table_info: {}'.format(extra_table_info)) + log(' print_victim_status: {}'.format(print_victim_status)) log(' print_victim_cmdline: {}'.format(print_victim_cmdline)) log(' max_victim_ancestry_depth: {}'.format(max_victim_ancestry_depth)) + + log(' print_statistics: {}'.format(print_statistics)) + log(' debug_gui_notifications: {}'.format(debug_gui_notifications)) - log(' separate_log: {}'.format(separate_log)) log(' debug_psi: {}'.format(debug_psi)) + log(' debug_sleep: {}'.format(debug_sleep)) + log(' debug_threading: {}'.format(debug_threading)) + log(' separate_log: {}'.format(separate_log)) - log('9. Misc') - - log(' max_soft_exit_time: {} sec'.format(max_soft_exit_time)) - log(' post_kill_exe: {}'.format(post_kill_exe)) - log(' forbid_negative_badness: {}'.format( - forbid_negative_badness)) - - # log(': {}'.format()) log('#' * 79) if check_config_flag: @@ -448,7 +496,6 @@ def get_swap_threshold_tuple(string): # re (Num %, True) or (Num KiB, False) """Returns KiB value if abs val was set in config, or tuple with %""" # return tuple with abs and bool: (abs %, True) or (abs MiB, False) - if string.endswith('%'): valid = string_to_float_convert_test(string[:-1]) if valid is None: @@ -949,6 +996,11 @@ def errprint(*text): """ """ print(*text, file=stderr, flush=True) + try: + if separate_log: + logging.info(*msg) + except NameError: + pass def mlockall(): @@ -1652,11 +1704,8 @@ def check_mem_swap_ex(): if (mem_available <= hard_threshold_min_mem_kb and swap_free <= hard_threshold_min_swap_kb): - mem_info = 'Memory status that requ' \ - 'ires corrective actions (hard threshold exceeded):' \ - '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ - 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ - 'p_min_sigkill [{} MiB, {} %]'.format( + mem_info = 'Memory status that requires corrective actions:\n MemAvailable [{} MiB, {} %] <= hard_threshold_min_mem [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= hard_threshold_min_swap [{} MiB, {} %]'.format( + kib_to_mib(mem_available), percent(mem_available / mem_total), kib_to_mib(hard_threshold_min_mem_kb), @@ -1669,14 +1718,13 @@ def check_mem_swap_ex(): return (SIGKILL, mem_info, mem_available, hard_threshold_min_swap_kb, soft_threshold_min_swap_kb, swap_free, swap_total) + + if (mem_available <= soft_threshold_min_mem_kb and swap_free <= soft_threshold_min_swap_kb): - mem_info = 'Memory status that requi' \ - 'res corrective actions (soft threshold exceeded):' \ - '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ - 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ - 'p_min_sigterm [{} MiB, {} %]'.format( + mem_info = 'Memory status that requires corrective actions:\n MemAvailable [{} MiB, {} %] <= soft_threshold_min_mem [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= soft_threshold_min_swap [{} MiB, {} %]'.format( + kib_to_mib(mem_available), percent(mem_available / mem_total), kib_to_mib(soft_threshold_min_mem_kb), @@ -1689,6 +1737,7 @@ def check_mem_swap_ex(): return (SIGTERM, mem_info, mem_available, hard_threshold_min_swap_kb, soft_threshold_min_swap_kb, swap_free, swap_total) + if low_memory_warnings_enabled: if (mem_available <= warning_threshold_min_mem_kb and swap_free <= @@ -1707,10 +1756,8 @@ def check_zram_ex(): if mem_used_zram >= hard_threshold_max_zram_kb: - mem_info = 'Memory status that requir' \ - 'es corrective actions (hard threshold exceeded):' \ - '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \ - 'kill [{} MiB, {} %]'.format( + mem_info = 'Memory status that requires corrective actions:\n MemUsedZram [{} MiB, {} %] >= hard_threshold_max_zram [{} MiB, {} %]'.format( + kib_to_mib(mem_used_zram), percent(mem_used_zram / mem_total), kib_to_mib(hard_threshold_max_zram_kb), @@ -1718,11 +1765,10 @@ def check_zram_ex(): return SIGKILL, mem_info, mem_used_zram + if mem_used_zram >= soft_threshold_max_zram_kb: - mem_info = 'Memory status that requires corrective actions (soft th' \ - 'reshold exceeded):\n MemUsedZram [{} MiB, {} %] >= zram_max_s' \ - 'igterm [{} M, {} %]'.format( + mem_info = 'Memory status that requires corrective actions:\n MemUsedZram [{} MiB, {} %] >= soft_threshold_max_zram [{} M, {} %]'.format( kib_to_mib(mem_used_zram), percent(mem_used_zram / mem_total), kib_to_mib(soft_threshold_max_zram_kb), @@ -1871,6 +1917,20 @@ def is_victim_alive(victim_id): return 0 + + + + + + + + + + + + + + def implement_corrective_action( threshold, mem_info_list, @@ -1882,6 +1942,8 @@ def implement_corrective_action( zram_threshold, zram_info, psi_info): + """ great and terrible function + """ log('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') @@ -2067,7 +2129,7 @@ def implement_corrective_action( cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name( pid)).replace('$SERVICE', service) - go(exe, cmd) + start_thread(exe, cmd) """ if exit_status == 0: @@ -2212,7 +2274,7 @@ def implement_corrective_action( log('Execute post_kill_exe') - go(exe, cmd) + start_thread(exe, cmd) if post_action_gui_notifications: if soft_match: @@ -2246,6 +2308,23 @@ def implement_corrective_action( return psi_t0 + + + + + + + + + + + + + + + + + def sleep_after_check_mem(): """Specify sleep times depends on rates and avialable memory.""" @@ -2372,7 +2451,7 @@ def calculate_percent(arg_key): return mem_min_kb, mem_min_mb, mem_min_percent -########################################################################## +############################################################################### # {victim_id : {'time': timestamp, 'name': name} @@ -2547,7 +2626,7 @@ except ValueError: log('config: ' + config) -########################################################################## +############################################################################### # parsing the config with obtaining the parameters dictionary @@ -2750,12 +2829,11 @@ else: soft_actions = True -########################################################################## +############################################################################### + -# post_zombie_delay = 0.1 -# victim_cache_time = 50 # extracting parameters from the dictionary @@ -2777,8 +2855,6 @@ post_action_gui_notifications = conf_parse_bool( 'post_action_gui_notifications') -if low_memory_warnings_enabled or post_action_gui_notifications: - from subprocess import Popen, TimeoutExpired debug_threading = conf_parse_bool('debug_threading') @@ -2850,13 +2926,35 @@ if 'env_cache_time' in config_dict: errprint('Invalid env_cache_time value, not float\nExit') exit(1) if env_cache_time < 0: - errprint('fill_rate_mem MUST be >= 0\nExit') + errprint('env_cache_time MUST be >= 0\nExit') exit(1) else: - errprint('fill_rate_mem not in config\nExit') + errprint('env_cache_time not in config\nExit') exit(1) + + + + +if 'exe_timeout' in config_dict: + exe_timeout = string_to_float_convert_test( + config_dict['exe_timeout']) + if exe_timeout is None: + errprint('Invalid exe_timeout value, not float\nExit') + exit(1) + if exe_timeout <= 0: + errprint('exe_timeout MUST be > 0\nExit') + exit(1) +else: + errprint('exe_timeout not in config\nExit') + exit(1) + + + + + + if 'fill_rate_mem' in config_dict: fill_rate_mem = string_to_float_convert_test(config_dict['fill_rate_mem']) if fill_rate_mem is None: @@ -3230,9 +3328,26 @@ if print_proc_table_flag: func_print_proc_table() + + + ########################################################################## + + +if (low_memory_warnings_enabled or \ + post_action_gui_notifications or \ + check_warning_exe or \ + soft_actions or \ + post_kill_exe != ''): + + import threading + from subprocess import Popen, TimeoutExpired + + + + psi_support = os.path.exists(psi_path) @@ -3298,7 +3413,7 @@ fill_rate_zram = fill_rate_zram * 1024 warn_time_now = 0 -warn_time_delta = 1000 +warn_time_delta = 1000 # ? warn_timer = 0 @@ -3372,6 +3487,15 @@ envd = dict() envd['list_with_envs'] = envd['t'] = None + + +cmd_num_dict = dict() +cmd_num_dict['cmd_num'] = 0 + + + + + ########################################################################## diff --git a/nohang-desktop.conf b/nohang-desktop.conf index 7144fc1..c6c80d3 100644 --- a/nohang-desktop.conf +++ b/nohang-desktop.conf @@ -6,188 +6,240 @@ The configuration includes the following sections: - 0. Common zram settings - 1. Memory levels to respond to as an OOM threat - 2. Response on PSI memory metrics - 3. The frequency of checking the level of available memory - (and CPU usage) - 4. The prevention of killing innocent victims - 5. Impact on the badness of processes via matching their names, cgroups and - cmdlines with specified regular expressions - 6. Customize corrective actions: the execution of a specific command - instead of sending the SIGTERM signal - 7. GUI notifications: - - low memory warnings - - OOM prevention results - 8. Output verbosity - 9. Misc + 1. Common zram settings + 2. Common PSI settings + 3. Poll rate + 4. Warnings and notifications + 5. Soft threshold + 6. Hard threshold + 7. Customize victim selection: adjusting badness of processes + 8. Customize soft corrective actions + 9. Misc settings + 10. Verbosity, debug, logging Just read the description of the parameters and edit the values. Please restart the program after editing the config. - More docs will be written later. + TODO: improve descriptions ############################################################################### - 0. Common zram settings + 1. Common zram settings - See https://www.kernel.org/doc/Documentation/blockdev/zram.txt - You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize. + Key: zram_checking_enabled + Description: + Type: boolean + Valid values: True and False + Default value: False zram_checking_enabled = False ############################################################################### - 1. Thresholds below which a signal should be sent to the victim + 2. Common PSI settings - Sets the available memory levels at or below which SIGTERM or SIGKILL - signals are sent. The signal will be sent if MemAvailable and - SwapFree (in /proc/meminfo) at the same time will drop below the - corresponding values. Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. - - MemAvailable levels. - -soft_threshold_min_mem = 8 % -hard_threshold_min_mem = 4 % - - SwapFree levels. - -soft_threshold_min_swap = 10 % -hard_threshold_min_swap = 5 % - - Specifying the total share of zram in memory, if exceeded the - corresponding signals are sent. As the share of zram in memory - increases, it may fall responsiveness of the system. 90 % is a - usual hang level, not recommended to set very high. - - Can be specified in % and M. Valid values are floating-point - numbers from the range [0; 90] %. - -soft_threshold_max_zram = 60 % -hard_threshold_max_zram = 65 % - - -############################################################################### - - 2. Response on PSI memory metrics (it needs Linux 4.20 and up) - - About PSI: - https://facebookmicrosites.github.io/psi/ - - Disabled by default (psi_checking_enabled = False). + Description: + Type: boolean + Valid values: True and False psi_checking_enabled = False - Choose a path to PSI file. - By default it monitors system-wide file: /proc/pressure/memory - You also can set file to monitor one cgroup slice. - For example: - psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure - - Execute the command - find /sys/fs/cgroup -name memory.pressure - to find available memory.pressue files (except /proc/pressure/memory). - (actual for cgroup2) + Description: + Type: string + Valid values: psi_path = /proc/pressure/memory - Valid psi_metrics are: - some_avg10 - some_avg60 - some_avg300 - full_avg10 - full_avg60 - full_avg300 - - some_avg10 is most sensitive. + Description: + Type: string + Valid values: psi_metrics = some_avg10 -soft_threshold_max_psi = 60 + Description: + Type: float + Valid values: -hard_threshold_max_psi = 90 - - >= 0, float psi_excess_duration = 60 + Description: + Type: float + Valid values: + psi_post_action_delay = 60 - ############################################################################### - 3. The frequency of checking the amount of available memory - (and CPU usage) + 3. Poll rate - Coefficients that affect the intensity of monitoring. Reducing - the coefficients can reduce CPU usage and increase the periods - between memory checks. - - Why three coefficients instead of one? Because the swap fill rate - is usually lower than the RAM fill rate. - - It is possible to set a lower intensity of monitoring for swap - without compromising to prevent OOM and thus reduce the CPU load. - - Default values are well for desktop. On servers without rapid - fluctuations in memory levels the values can be reduced. - - Valid values are positive floating-point numbers. + Description: + Type: float + Valid values: fill_rate_mem = 4000 + + Description: + Type: float + Valid values: + fill_rate_swap = 1500 + + Description: + Type: float + Valid values: + fill_rate_zram = 6000 - See also https://github.com/rfjakob/earlyoom/issues/61 + Description: + Type: float + Valid values: max_sleep = 3 + + Description: + Type: float + Valid values: + min_sleep = 0.1 - Sleep time if soft threshold exceeded. + Description: + Type: float + Valid values: over_sleep = 0.05 ############################################################################### - 4. The prevention of killing innocent victims + 4. Warnings and notifications - Valid values are integers from the range [0; 1000]. + Description: + Type: boolean + Valid values: True and False -min_badness = 10 +post_action_gui_notifications = True - Valid values are non-negative floating-point numbers. - Min delay if a victim doesn't respond to SIGTERM in 10 ms. + Description: + Type: boolean + Valid values: True and False -post_soft_action_delay = 3 +low_memory_warnings_enabled = True -post_zombie_delay = 0.1 + Description: + Type: string + Valid values: -victim_cache_time = 10 +warning_exe = - Valid values are True and False. + Description: + Type: float (+ % or M) + Valid values: -ignore_positive_oom_score_adj = False +warning_threshold_min_mem = 20 % + + Description: + Type: float (+ % or M) + Valid values: + +warning_threshold_min_swap = 20 % + + Description: + Type: float (+ % or M) + Valid values: + +warning_threshold_max_zram = 50 % + + Description: + Type: float + Valid values: + +warning_threshold_max_psi = 100 + + Description: + Type: float + Valid values: + +min_post_warning_delay = 30 + + Description: + Type: float + Valid values: + +env_cache_time = 300 ############################################################################### - 5. Impact on the badness of processes via matching their names, - cmdlines or UIDs with regular expressions using re.search(). + 5. Soft threshold - See https://en.wikipedia.org/wiki/Regular_expression and - https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions + Description: + Type: float (+ % or M) + Valid values: - Enabling this options slows down the search for the victim - because the names, cmdlines or UIDs of all processes - (except init and kthreads) are compared with the - specified regex patterns (in fact slowing down is caused by - reading all /proc/*/cmdline and /proc/*/status files). +soft_threshold_min_mem = 8 % - Use script `oom-sort` from nohang package to view - names, cmdlines and UIDs of processes. + Description: + Type: float (+ % or M) + Valid values: - 5.1. Matching process names with RE patterns +soft_threshold_min_swap = 8 % + + Description: + Type: float (+ % or M) + Valid values: + +soft_threshold_max_zram = 60 % + + Description: + Type: float + Valid values: + +soft_threshold_max_psi = 60 + +############################################################################### + + 6. Hard threshold + +hard_threshold_min_mem = 4 % + + Description: + Type: float (+ % or M) + Valid values: + +hard_threshold_min_swap = 4 % + + Description: + Type: float (+ % or M) + Valid values: + +hard_threshold_max_zram = 65 % + + Description: + Type: float + Valid values: + +hard_threshold_max_psi = 90 + +############################################################################### + + 7. Customize victim selection: adjusting badness of processes + + 7.1. Ignore positive oom_score_adj + + Description: + Type: boolean + Valid values: True and False + +ignore_positive_oom_score_adj = False + + 7.2. Forbid negative badness + + Description: + Type: boolean + Valid values: True and False + +forbid_negative_badness = True + + + 7.3.1. Matching process names with RE patterns change their badness Syntax: @@ -204,28 +256,27 @@ ignore_positive_oom_score_adj = False Prefer firefox tabs @BADNESS_ADJ_RE_NAME 300 /// ^Web Content$ + 7.3.2. Matching CGroup_v1-line with RE patterns - 5.2. Matching CGroup_v1-line with RE patterns - -@BADNESS_ADJ_RE_CGROUP_V1 -100 /// ^/system\.slice/ +@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/ @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$ @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/ - 5.3. Matching CGroup_v2-line with RE patterns + 7.3.3. Matching CGroup_v2-line with RE patterns @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload - 5.4. Matching eUIDs with RE patterns + 7.3.4. Matching eUIDs with RE patterns @BADNESS_ADJ_RE_UID -100 /// ^0$ - 5.5. Matching realpath with RE patterns + 7.3.5. Matching realpath with RE patterns @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo - 5.6. Matching cmdlines with RE patterns + 7.3.6. Matching cmdlines with RE patterns A good option that allows fine adjustment. @@ -233,21 +284,22 @@ ignore_positive_oom_score_adj = False @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer Prefer firefox tabs (Web Content and WebExtensions) - @BADNESS_ADJ_RE_CMDLINE 300 /// -appomni + @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox - 5.7. Matching environ with RE patterns + 7.3.7. Matching environ with RE patterns @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user + Note that you can control badness also via systemd units via OOMScoreAdjust, see www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= ############################################################################### - 6. Customize corrective actions. + 8. Customize soft corrective actions TODO: docs @@ -260,6 +312,8 @@ ignore_positive_oom_score_adj = False @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE @SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE + @SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID + $PID will be replaced by process PID. $NAME will be replaced by process name. $SERVICE will be replaced by .service if it exists (overwise it will be @@ -267,60 +321,80 @@ ignore_positive_oom_score_adj = False ############################################################################### - 7. GUI notifications & low memory warnings + 9. Misc settings -post_action_gui_notifications = True + Description: + Type: float + Valid values: - Enable GUI notifications about the low level of available memory. - Valid values are True and False. +max_soft_exit_time = 10 -low_memory_warnings_enabled = True + Description: + Type: string + Valid values: - Execute the command instead of sending GUI notifications if the value is - not empty line. For example: - warning_exe = cat /proc/meminfo & +post_kill_exe = -warning_exe = + Description: + Type: integer + Valid values: - Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. +min_badness = 10 -warning_threshold_min_mem = 20 % + Description: + Type: float + Valid values: -warning_threshold_min_swap = 25 % +post_soft_action_delay = 3 -warning_threshold_max_zram = 50 % + Description: + Type: float + Valid values: -warning_threshold_max_psi = 100 +post_zombie_delay = 0.1 - Valid values are floating-point numbers from the range [1; 300]. + Description: + Type: float + Valid values: -min_post_warning_delay = 30 +victim_cache_time = 10 -env_cache_time = 300 + Description: + Type: float + Valid values: - - Ampersands (&) will be replaced with asterisks (*) in process - names and in commands. +exe_timeout = 20 ############################################################################### - 8. Verbosity + 10. Verbosity, debug, logging - Display the configuration when the program starts. - Valid values are True and False. + Description: + Type: boolean + Valid values: True and False print_config_at_startup = False - Print memory check results. - Valid values are True and False. + Description: + Type: boolean + Valid values: True and False print_mem_check_results = False + Description: + Type: float + Valid values: + min_mem_report_interval = 60 + Description: + Type: boolean + Valid values: True and False + print_proc_table = False + Description: + Type: string Valid values: None cgroup_v1 @@ -331,36 +405,59 @@ print_proc_table = False extra_table_info = None + Description: + Type: boolean + Valid values: True and False + print_victim_status = True -max_victim_ancestry_depth = 3 + Description: + Type: boolean + Valid values: True and False print_victim_cmdline = False + Description: + Type: integer + Valid values: + +max_victim_ancestry_depth = 3 + + Description: + Type: boolean + Valid values: True and False + print_statistics = True - Print sleep periods between memory checks. - Valid values are True and False. + Description: + Type: boolean + Valid values: True and False debug_psi = False + Description: + Type: boolean + Valid values: True and False + debug_gui_notifications = False + Description: + Type: boolean + Valid values: True and False + debug_sleep = False -separate_log = False + Description: + Type: boolean + Valid values: True and False debug_threading = False -############################################################################### + Description: + Type: boolean + Valid values: True and False - 9. Misc - -max_soft_exit_time = 10 - -post_kill_exe = - -forbid_negative_badness = True +separate_log = False ############################################################################### diff --git a/nohang.conf b/nohang.conf index b36e0e9..534fa75 100644 --- a/nohang.conf +++ b/nohang.conf @@ -6,188 +6,240 @@ The configuration includes the following sections: - 0. Common zram settings - 1. Memory levels to respond to as an OOM threat - 2. Response on PSI memory metrics - 3. The frequency of checking the level of available memory - (and CPU usage) - 4. The prevention of killing innocent victims - 5. Impact on the badness of processes via matching their names, cgroups and - cmdlines with specified regular expressions - 6. Customize corrective actions: the execution of a specific command - instead of sending the SIGTERM signal - 7. GUI notifications: - - low memory warnings - - OOM prevention results - 8. Output verbosity - 9. Misc + 1. Common zram settings + 2. Common PSI settings + 3. Poll rate + 4. Warnings and notifications + 5. Soft threshold + 6. Hard threshold + 7. Customize victim selection: adjusting badness of processes + 8. Customize soft corrective actions + 9. Misc settings + 10. Verbosity, debug, logging Just read the description of the parameters and edit the values. Please restart the program after editing the config. - More docs will be written later. + TODO: improve descriptions ############################################################################### - 0. Common zram settings + 1. Common zram settings - See https://www.kernel.org/doc/Documentation/blockdev/zram.txt - You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize. + Key: zram_checking_enabled + Description: + Type: boolean + Valid values: True and False + Default value: False zram_checking_enabled = False ############################################################################### - 1. Thresholds below which a signal should be sent to the victim + 2. Common PSI settings - Sets the available memory levels at or below which SIGTERM or SIGKILL - signals are sent. The signal will be sent if MemAvailable and - SwapFree (in /proc/meminfo) at the same time will drop below the - corresponding values. Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. - - MemAvailable levels. - -soft_threshold_min_mem = 8 % -hard_threshold_min_mem = 4 % - - SwapFree levels. - -soft_threshold_min_swap = 10 % -hard_threshold_min_swap = 5 % - - Specifying the total share of zram in memory, if exceeded the - corresponding signals are sent. As the share of zram in memory - increases, it may fall responsiveness of the system. 90 % is a - usual hang level, not recommended to set very high. - - Can be specified in % and M. Valid values are floating-point - numbers from the range [0; 90] %. - -soft_threshold_max_zram = 60 % -hard_threshold_max_zram = 65 % - - -############################################################################### - - 2. Response on PSI memory metrics (it needs Linux 4.20 and up) - - About PSI: - https://facebookmicrosites.github.io/psi/ - - Disabled by default (psi_checking_enabled = False). + Description: + Type: boolean + Valid values: True and False psi_checking_enabled = False - Choose a path to PSI file. - By default it monitors system-wide file: /proc/pressure/memory - You also can set file to monitor one cgroup slice. - For example: - psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure - - Execute the command - find /sys/fs/cgroup -name memory.pressure - to find available memory.pressue files (except /proc/pressure/memory). - (actual for cgroup2) + Description: + Type: string + Valid values: psi_path = /proc/pressure/memory - Valid psi_metrics are: - some_avg10 - some_avg60 - some_avg300 - full_avg10 - full_avg60 - full_avg300 - - some_avg10 is most sensitive. + Description: + Type: string + Valid values: psi_metrics = some_avg10 -soft_threshold_max_psi = 60 + Description: + Type: float + Valid values: -hard_threshold_max_psi = 90 - - >= 0, float psi_excess_duration = 60 + Description: + Type: float + Valid values: + psi_post_action_delay = 60 - ############################################################################### - 3. The frequency of checking the amount of available memory - (and CPU usage) + 3. Poll rate - Coefficients that affect the intensity of monitoring. Reducing - the coefficients can reduce CPU usage and increase the periods - between memory checks. - - Why three coefficients instead of one? Because the swap fill rate - is usually lower than the RAM fill rate. - - It is possible to set a lower intensity of monitoring for swap - without compromising to prevent OOM and thus reduce the CPU load. - - Default values are well for desktop. On servers without rapid - fluctuations in memory levels the values can be reduced. - - Valid values are positive floating-point numbers. + Description: + Type: float + Valid values: fill_rate_mem = 4000 + + Description: + Type: float + Valid values: + fill_rate_swap = 1500 + + Description: + Type: float + Valid values: + fill_rate_zram = 6000 - See also https://github.com/rfjakob/earlyoom/issues/61 + Description: + Type: float + Valid values: max_sleep = 3 + + Description: + Type: float + Valid values: + min_sleep = 0.1 - Sleep time if soft threshold exceeded. + Description: + Type: float + Valid values: over_sleep = 0.05 ############################################################################### - 4. The prevention of killing innocent victims + 4. Warnings and notifications - Valid values are integers from the range [0; 1000]. + Description: + Type: boolean + Valid values: True and False -min_badness = 10 +post_action_gui_notifications = False - Valid values are non-negative floating-point numbers. - Min delay if a victim doesn't respond to SIGTERM in 10 ms. + Description: + Type: boolean + Valid values: True and False -post_soft_action_delay = 3 +low_memory_warnings_enabled = False -post_zombie_delay = 0.1 + Description: + Type: string + Valid values: -victim_cache_time = 10 +warning_exe = - Valid values are True and False. + Description: + Type: float (+ % or M) + Valid values: -ignore_positive_oom_score_adj = False +warning_threshold_min_mem = 20 % + + Description: + Type: float (+ % or M) + Valid values: + +warning_threshold_min_swap = 20 % + + Description: + Type: float (+ % or M) + Valid values: + +warning_threshold_max_zram = 50 % + + Description: + Type: float + Valid values: + +warning_threshold_max_psi = 100 + + Description: + Type: float + Valid values: + +min_post_warning_delay = 30 + + Description: + Type: float + Valid values: + +env_cache_time = 300 ############################################################################### - 5. Impact on the badness of processes via matching their names, - cmdlines or UIDs with regular expressions using re.search(). + 5. Soft threshold - See https://en.wikipedia.org/wiki/Regular_expression and - https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions + Description: + Type: float (+ % or M) + Valid values: - Enabling this options slows down the search for the victim - because the names, cmdlines or UIDs of all processes - (except init and kthreads) are compared with the - specified regex patterns (in fact slowing down is caused by - reading all /proc/*/cmdline and /proc/*/status files). +soft_threshold_min_mem = 8 % - Use script `oom-sort` from nohang package to view - names, cmdlines and UIDs of processes. + Description: + Type: float (+ % or M) + Valid values: - 5.1. Matching process names with RE patterns +soft_threshold_min_swap = 8 % + + Description: + Type: float (+ % or M) + Valid values: + +soft_threshold_max_zram = 60 % + + Description: + Type: float + Valid values: + +soft_threshold_max_psi = 60 + +############################################################################### + + 6. Hard threshold + +hard_threshold_min_mem = 4 % + + Description: + Type: float (+ % or M) + Valid values: + +hard_threshold_min_swap = 4 % + + Description: + Type: float (+ % or M) + Valid values: + +hard_threshold_max_zram = 65 % + + Description: + Type: float + Valid values: + +hard_threshold_max_psi = 90 + +############################################################################### + + 7. Customize victim selection: adjusting badness of processes + + 7.1. Ignore positive oom_score_adj + + Description: + Type: boolean + Valid values: True and False + +ignore_positive_oom_score_adj = False + + 7.2. Forbid negative badness + + Description: + Type: boolean + Valid values: True and False + +forbid_negative_badness = True + + + 7.3.1. Matching process names with RE patterns change their badness Syntax: @@ -201,27 +253,27 @@ ignore_positive_oom_score_adj = False Example: @BADNESS_ADJ_RE_NAME -500 /// ^sshd$ - 5.2. Matching CGroup_v1-line with RE patterns + 7.3.2. Matching CGroup_v1-line with RE patterns - @BADNESS_ADJ_RE_CGROUP_V1 -100 /// ^/system\.slice/ + @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/ @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$ @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/ - 5.3. Matching CGroup_v2-line with RE patterns + 7.3.3. Matching CGroup_v2-line with RE patterns @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload - 5.4. Matching eUIDs with RE patterns + 7.3.4. Matching eUIDs with RE patterns @BADNESS_ADJ_RE_UID -100 /// ^0$ - 5.5. Matching realpath with RE patterns + 7.3.5. Matching realpath with RE patterns @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo - 5.6. Matching cmdlines with RE patterns + 7.3.6. Matching cmdlines with RE patterns A good option that allows fine adjustment. @@ -229,21 +281,22 @@ ignore_positive_oom_score_adj = False @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer Prefer firefox tabs (Web Content and WebExtensions) - @BADNESS_ADJ_RE_CMDLINE 300 /// -appomni + @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox - 5.7. Matching environ with RE patterns + 7.3.7. Matching environ with RE patterns @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user + Note that you can control badness also via systemd units via OOMScoreAdjust, see www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= ############################################################################### - 6. Customize corrective actions. + 8. Customize soft corrective actions TODO: docs @@ -256,6 +309,8 @@ ignore_positive_oom_score_adj = False @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE @SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE + @SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID + $PID will be replaced by process PID. $NAME will be replaced by process name. $SERVICE will be replaced by .service if it exists (overwise it will be @@ -263,59 +318,80 @@ ignore_positive_oom_score_adj = False ############################################################################### - 7. GUI notifications & low memory warnings + 9. Misc settings -post_action_gui_notifications = False + Description: + Type: float + Valid values: - Enable GUI notifications about the low level of available memory. - Valid values are True and False. +max_soft_exit_time = 10 -low_memory_warnings_enabled = False + Description: + Type: string + Valid values: - Execute the command instead of sending GUI notifications if the value is - not empty line. For example: - warning_exe = cat /proc/meminfo & +post_kill_exe = -warning_exe = + Description: + Type: integer + Valid values: - Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. +min_badness = 10 -warning_threshold_min_mem = 20 % + Description: + Type: float + Valid values: -warning_threshold_min_swap = 25 % +post_soft_action_delay = 3 -warning_threshold_max_zram = 50 % + Description: + Type: float + Valid values: -warning_threshold_max_psi = 100 +post_zombie_delay = 0.1 - Valid values are floating-point numbers from the range [1; 300]. + Description: + Type: float + Valid values: -min_post_warning_delay = 20 +victim_cache_time = 10 -env_cache_time = 300 + Description: + Type: float + Valid values: - Ampersands (&) will be replaced with asterisks (*) in process - names and in commands. +exe_timeout = 20 ############################################################################### - 8. Verbosity + 10. Verbosity, debug, logging - Display the configuration when the program starts. - Valid values are True and False. + Description: + Type: boolean + Valid values: True and False print_config_at_startup = False - Print memory check results. - Valid values are True and False. + Description: + Type: boolean + Valid values: True and False print_mem_check_results = False + Description: + Type: float + Valid values: + min_mem_report_interval = 60 + Description: + Type: boolean + Valid values: True and False + print_proc_table = False + Description: + Type: string Valid values: None cgroup_v1 @@ -326,36 +402,59 @@ print_proc_table = False extra_table_info = None + Description: + Type: boolean + Valid values: True and False + print_victim_status = True -max_victim_ancestry_depth = 3 + Description: + Type: boolean + Valid values: True and False print_victim_cmdline = False + Description: + Type: integer + Valid values: + +max_victim_ancestry_depth = 3 + + Description: + Type: boolean + Valid values: True and False + print_statistics = True - Print sleep periods between memory checks. - Valid values are True and False. + Description: + Type: boolean + Valid values: True and False debug_psi = False + Description: + Type: boolean + Valid values: True and False + debug_gui_notifications = False + Description: + Type: boolean + Valid values: True and False + debug_sleep = False -separate_log = False + Description: + Type: boolean + Valid values: True and False debug_threading = False -############################################################################### + Description: + Type: boolean + Valid values: True and False - 9. Misc - -max_soft_exit_time = 10 - -post_kill_exe = - -forbid_negative_badness = True +separate_log = False ############################################################################### diff --git a/test.conf b/test.conf index dc85b2b..afa4aed 100644 --- a/test.conf +++ b/test.conf @@ -6,186 +6,240 @@ The configuration includes the following sections: - 0. Common zram settings - 1. Memory levels to respond to as an OOM threat - 2. Response on PSI memory metrics - 3. The frequency of checking the level of available memory - (and CPU usage) - 4. The prevention of killing innocent victims - 5. Impact on the badness of processes via matching their names, cgroups and - cmdlines with specified regular expressions - 6. Customize corrective actions: the execution of a specific command - instead of sending the SIGTERM signal - 7. GUI notifications: - - low memory warnings - - OOM prevention results - 8. Output verbosity - 9. Misc + 1. Common zram settings + 2. Common PSI settings + 3. Poll rate + 4. Warnings and notifications + 5. Soft threshold + 6. Hard threshold + 7. Customize victim selection: adjusting badness of processes + 8. Customize soft corrective actions + 9. Misc settings + 10. Verbosity, debug, logging Just read the description of the parameters and edit the values. Please restart the program after editing the config. + TODO: improve descriptions + ############################################################################### - 0. Common zram settings + 1. Common zram settings - See https://www.kernel.org/doc/Documentation/blockdev/zram.txt - You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize. + Key: zram_checking_enabled + Description: + Type: boolean + Valid values: True and False + Default value: False zram_checking_enabled = True ############################################################################### - 1. Thresholds below which a signal should be sent to the victim + 2. Common PSI settings - Sets the available memory levels at or below which SIGTERM or SIGKILL - signals are sent. The signal will be sent if MemAvailable and - SwapFree (in /proc/meminfo) at the same time will drop below the - corresponding values. Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. - - MemAvailable levels. - -soft_threshold_min_mem = 10 % -hard_threshold_min_mem = 5 % - - SwapFree levels. - -soft_threshold_min_swap = 15 % -hard_threshold_min_swap = 5 % - - Specifying the total share of zram in memory, if exceeded the - corresponding signals are sent. As the share of zram in memory - increases, it may fall responsiveness of the system. 90 % is a - usual hang level, not recommended to set very high. - - Can be specified in % and M. Valid values are floating-point - numbers from the range [0; 90] %. - -soft_threshold_max_zram = 50 % -hard_threshold_max_zram = 60 % - - -############################################################################### - - 2. Response on PSI memory metrics (it needs Linux 4.20 and up) - - About PSI: - https://facebookmicrosites.github.io/psi/ - - Disabled by default (psi_checking_enabled = False). + Description: + Type: boolean + Valid values: True and False psi_checking_enabled = True - Choose a path to PSI file. - By default it monitors system-wide file: /proc/pressure/memory - You also can set file to monitor one cgroup slice. - For example: - psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure - - Execute the command - find /sys/fs/cgroup -name memory.pressure - to find available memory.pressue files (except /proc/pressure/memory). - (actual for cgroup2) + Description: + Type: string + Valid values: psi_path = /proc/pressure/memory - Valid psi_metrics are: - some_avg10 - some_avg60 - some_avg300 - full_avg10 - full_avg60 - full_avg300 - - some_avg10 is most sensitive. + Description: + Type: string + Valid values: psi_metrics = some_avg10 -soft_threshold_max_psi = 60 + Description: + Type: float + Valid values: -hard_threshold_max_psi = 90 - - >= 0, float psi_excess_duration = 60 + Description: + Type: float + Valid values: + psi_post_action_delay = 60 - ############################################################################### - 3. The frequency of checking the amount of available memory - (and CPU usage) + 3. Poll rate - Coefficients that affect the intensity of monitoring. Reducing - the coefficients can reduce CPU usage and increase the periods - between memory checks. - - Why three coefficients instead of one? Because the swap fill rate - is usually lower than the RAM fill rate. - - It is possible to set a lower intensity of monitoring for swap - without compromising to prevent OOM and thus reduce the CPU load. - - Default values are well for desktop. On servers without rapid - fluctuations in memory levels the values can be reduced. - - Valid values are positive floating-point numbers. + Description: + Type: float + Valid values: fill_rate_mem = 4000 + + Description: + Type: float + Valid values: + fill_rate_swap = 1500 + + Description: + Type: float + Valid values: + fill_rate_zram = 6000 - See also https://github.com/rfjakob/earlyoom/issues/61 + Description: + Type: float + Valid values: max_sleep = 3 + + Description: + Type: float + Valid values: + min_sleep = 0.1 - Sleep time if soft threshold exceeded. + Description: + Type: float + Valid values: over_sleep = 0.05 ############################################################################### - 4. The prevention of killing innocent victims + 4. Warnings and notifications - Valid values are integers from the range [0; 1000]. + Description: + Type: boolean + Valid values: True and False -min_badness = 20 +post_action_gui_notifications = True - Valid values are non-negative floating-point numbers. - Min delay if a victim doesn't respond to SIGTERM in 10 ms. + Description: + Type: boolean + Valid values: True and False -post_soft_action_delay = 3 +low_memory_warnings_enabled = True -post_zombie_delay = 0.1 + Description: + Type: string + Valid values: -victim_cache_time = 10 +warning_exe = - Valid values are True and False. + Description: + Type: float (+ % or M) + Valid values: -ignore_positive_oom_score_adj = True +warning_threshold_min_mem = 20 % + + Description: + Type: float (+ % or M) + Valid values: + +warning_threshold_min_swap = 20 % + + Description: + Type: float (+ % or M) + Valid values: + +warning_threshold_max_zram = 50 % + + Description: + Type: float + Valid values: + +warning_threshold_max_psi = 100 + + Description: + Type: float + Valid values: + +min_post_warning_delay = 30 + + Description: + Type: float + Valid values: + +env_cache_time = 300 ############################################################################### - 5. Impact on the badness of processes via matching their names, - cmdlines or UIDs with regular expressions using re.search(). + 5. Soft threshold - See https://en.wikipedia.org/wiki/Regular_expression and - https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions + Description: + Type: float (+ % or M) + Valid values: - Enabling this options slows down the search for the victim - because the names, cmdlines or UIDs of all processes - (except init and kthreads) are compared with the - specified regex patterns (in fact slowing down is caused by - reading all /proc/*/cmdline and /proc/*/status files). +soft_threshold_min_mem = 8 % - Use script `oom-sort` from nohang package to view - names, cmdlines and UIDs of processes. + Description: + Type: float (+ % or M) + Valid values: - 5.1. Matching process names with RE patterns +soft_threshold_min_swap = 8 % + + Description: + Type: float (+ % or M) + Valid values: + +soft_threshold_max_zram = 60 % + + Description: + Type: float + Valid values: + +soft_threshold_max_psi = 60 + +############################################################################### + + 6. Hard threshold + +hard_threshold_min_mem = 4 % + + Description: + Type: float (+ % or M) + Valid values: + +hard_threshold_min_swap = 4 % + + Description: + Type: float (+ % or M) + Valid values: + +hard_threshold_max_zram = 65 % + + Description: + Type: float + Valid values: + +hard_threshold_max_psi = 90 + +############################################################################### + + 7. Customize victim selection: adjusting badness of processes + + 7.1. Ignore positive oom_score_adj + + Description: + Type: boolean + Valid values: True and False + +ignore_positive_oom_score_adj = True + + 7.2. Forbid negative badness + + Description: + Type: boolean + Valid values: True and False + +forbid_negative_badness = True + + + 7.3.1. Matching process names with RE patterns change their badness Syntax: @@ -199,61 +253,69 @@ ignore_positive_oom_score_adj = True Example: @BADNESS_ADJ_RE_NAME -500 /// ^sshd$ - 5.2. Matching CGroup_v1-line with RE patterns + 7.3.2. Matching CGroup_v1-line with RE patterns -@BADNESS_ADJ_RE_CGROUP_V1 5 /// ^/system\.slice/ +@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/ @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$ @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/ - 5.3. Matching CGroup_v2-line with RE patterns + 7.3.3. Matching CGroup_v2-line with RE patterns @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload - 5.4. Matching eUIDs with RE patterns + 7.3.4. Matching eUIDs with RE patterns -@BADNESS_ADJ_RE_UID 50 /// ^0$ +@BADNESS_ADJ_RE_UID -100 /// ^0$ - 5.5. Matching realpath with RE patterns + 7.3.5. Matching realpath with RE patterns @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo - 5.6. Matching cmdlines with RE patterns + 7.3.6. Matching cmdlines with RE patterns - A good option that allows fine adjustment. - - Prefer chromium tabs and electron-based apps @BADNESS_ADJ_RE_CMDLINE 2000 /// ^/bin/sleep + + Prefer chromium tabs and electron-based apps +@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer + Prefer firefox tabs (Web Content and WebExtensions) - @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni +@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni - @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox +@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox - 5.7. Matching environ with RE patterns + 7.3.7. Matching environ with RE patterns @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user + Note that you can control badness also via systemd units via OOMScoreAdjust, see www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= ############################################################################### - 6. Customize corrective actions. + 8. Customize soft corrective actions TODO: docs Syntax: KEY REGEXP SEPARATOR COMMAND + @SOFT_ACTION_RE_NAME ^tail$ /// kill -SEGV $PID + + + @SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID @SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE @SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE + @SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID + $PID will be replaced by process PID. $NAME will be replaced by process name. $SERVICE will be replaced by .service if it exists (overwise it will be @@ -261,59 +323,80 @@ ignore_positive_oom_score_adj = True ############################################################################### - 7. GUI notifications & low memory warnings + 9. Misc settings -post_action_gui_notifications = True + Description: + Type: float + Valid values: - Enable GUI notifications about the low level of available memory. - Valid values are True and False. +max_soft_exit_time = 10 -low_memory_warnings_enabled = True + Description: + Type: string + Valid values: - Execute the command instead of sending GUI notifications if the value is - not empty line. For example: - warning_exe = cat /proc/meminfo & +post_kill_exe = -warning_exe = echo 0 + Description: + Type: integer + Valid values: - Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. +min_badness = 10 -warning_threshold_min_mem = 25 % + Description: + Type: float + Valid values: -warning_threshold_min_swap = 35 % +post_soft_action_delay = 3 -warning_threshold_max_zram = 40 % + Description: + Type: float + Valid values: -warning_threshold_max_psi = 100 +post_zombie_delay = 0.1 - Valid values are floating-point numbers from the range [1; 300]. + Description: + Type: float + Valid values: -min_post_warning_delay = 20 +victim_cache_time = 10 -env_cache_time = 300 + Description: + Type: float + Valid values: - Ampersands (&) will be replaced with asterisks (*) in process - names and in commands. +exe_timeout = 20 ############################################################################### - 8. Verbosity + 10. Verbosity, debug, logging - Display the configuration when the program starts. - Valid values are True and False. + Description: + Type: boolean + Valid values: True and False print_config_at_startup = True - Print memory check results. - Valid values are True and False. + Description: + Type: boolean + Valid values: True and False print_mem_check_results = True + Description: + Type: float + Valid values: + min_mem_report_interval = 0 + Description: + Type: boolean + Valid values: True and False + print_proc_table = True + Description: + Type: string Valid values: None cgroup_v1 @@ -322,39 +405,61 @@ print_proc_table = True cmdline environ -extra_table_info = cgroup_v1 +extra_table_info = None + + Description: + Type: boolean + Valid values: True and False print_victim_status = True -max_victim_ancestry_depth = 99 + Description: + Type: boolean + Valid values: True and False print_victim_cmdline = True + Description: + Type: integer + Valid values: + +max_victim_ancestry_depth = 99 + + Description: + Type: boolean + Valid values: True and False + print_statistics = True - Print sleep periods between memory checks. - Valid values are True and False. + Description: + Type: boolean + Valid values: True and False debug_psi = True + Description: + Type: boolean + Valid values: True and False + debug_gui_notifications = True + Description: + Type: boolean + Valid values: True and False + debug_sleep = True -separate_log = True + Description: + Type: boolean + Valid values: True and False debug_threading = True + Description: + Type: boolean + Valid values: True and False -############################################################################### - - 9. Misc - -max_soft_exit_time = 10 - -post_kill_exe = echo 0 - -forbid_negative_badness = True +separate_log = True ###############################################################################