This commit is contained in:
Alexey Avramov 2019-09-23 00:40:10 +09:00
parent 3cd460e6eb
commit 31ee445e7f
5 changed files with 1067 additions and 643 deletions

View File

@ -149,17 +149,16 @@ optional arguments:
The program can be configured by editing the [config file](https://github.com/hakavlad/nohang/blob/master/nohang.conf). The configuration includes the following sections:
1. Memory levels to respond to as an OOM threat
2. Response on PSI memory metrics
3. The frequency of checking the level of available memory (and CPU usage)
4. The prevention of killing innocent victims
5. Impact on the badness of processes via matching their names, cgroups, realpaths, cmdlines and UIDs with certain regular expressions
6. The execution of a specific command or sending any signal instead of sending the SIGTERM signal
7. GUI notifications:
- notifications of corrective actions taken
- low memory warnings (or executing certain command instead)
8. Verbosity
9. Misc
1. Common zram settings
2. Common PSI settings
3. Poll rate
4. Warnings and notifications
5. Soft threshold
6. Hard threshold
7. Customize victim selection
8. Customize soft corrective actions
9. Misc settings
10. Verbosity, debug, logging
Just read the description of the parameters and edit the values. Please restart nohang to apply the changes. Default path to the config after installing is `/etc/nohang/nohang.conf`.

364
nohang
View File

@ -9,44 +9,67 @@ from sys import stdout, stderr, argv, exit
from re import search
from sre_constants import error as invalid_re
from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
from threading import Thread
##########################################################################
###############################################################################
# define functions
def exe(cmd):
""" execute cmd
""" execute cmd in subprocess.Popen()
"""
log('Execute the command: {}'.format(cmd))
t0 = monotonic()
write_self_oom_score_adj(self_oom_score_adj_max)
err = os.system(cmd)
write_self_oom_score_adj(self_oom_score_adj_min)
dt = monotonic() - t0
log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
return err
cmd_num_dict['cmd_num'] += 1
cmd_num = cmd_num_dict['cmd_num']
log('Execute the command({}) in {}: {}'.format(
cmd_num,
threading.current_thread().getName(),
cmd))
t3 = monotonic()
with Popen(cmd, shell=True) as proc:
try:
proc.wait(timeout=exe_timeout)
exit_status = proc.poll()
t4 = monotonic()
log('Command({}) execution completed in {} sec; exit status' \
': {}'.format(cmd_num, round(t4 - t3, 3), exit_status))
except TimeoutExpired:
proc.kill()
t4 = monotonic()
log('TimeoutExpired for the command({}) in {} sec'.format(
cmd_num, round(t4 - t3, 3)))
def go(func, *a):
""" run func in new thread
def start_thread(func, *a, **k):
""" run function in a new thread
"""
t1 = monotonic()
th = Thread(target=func, args=a)
th = threading.Thread(target=func, args=a, kwargs=k)
th_name = th.getName()
if debug_threading:
log('Starting {}'.format(th_name))
log('Starting {} from {}'.format(
th_name, threading.current_thread().getName()
))
try:
t1 = monotonic()
th.start()
t2 = monotonic()
if debug_threading:
log('{} has started in {} ms'.format(
th_name, round((t2 - t1) * 1000, 1)))
log('{} has started in {} ms, {} threads currently alive'.format(
th_name, round((t2 - t1) * 1000, 1), threading.active_count()
))
except RuntimeError:
if debug_threading:
log('RuntimeError: cannot start {}'.format(th_name))
log('RuntimeError: cannot start {}'.format(th_name))
return 1
def re_pid_environ(pid):
@ -57,7 +80,6 @@ def re_pid_environ(pid):
'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus')
returns None if these vars is not in /proc/[pid]/environ
"""
try:
with open('/proc/' + pid + '/environ') as f:
env = f.read()
@ -128,8 +150,9 @@ def root_notify_env():
def pop(cmd, username):
""" run cmd in subprocess.Popen()
"""
"""
if swap_total == 0:
wait_time = 2
else:
@ -140,6 +163,7 @@ def pop(cmd, username):
with Popen(cmd) as proc:
try:
proc.wait(timeout=wait_time)
err = proc.poll()
except TimeoutExpired:
proc.kill()
if debug_gui_notifications:
@ -147,8 +171,12 @@ def pop(cmd, username):
t4 = monotonic()
err = 0
if debug_gui_notifications:
log('Popen time: {} sec; cmd: {}'.format(round(t4 - t3, 3), cmd))
pass
#log('Popen time: {} sec; exit status: {}; cmd: {}'.format(round(t4 - t3, 3), err, cmd))
log('Popen time: {} sec; exit status: {}; cmd: {}'.format(round(t4 - t3, 3), err, cmd))
def send_notification(title, body):
@ -214,7 +242,7 @@ def send_notification(title, body):
body
]
go(pop, cmd, username)
start_thread(pop, cmd, username)
else:
if debug_gui_notifications:
@ -227,7 +255,7 @@ def send_notify_warn():
log('Warning threshold exceeded')
if check_warning_exe:
go(exe, warning_exe)
start_thread(exe, warning_exe)
else:
@ -238,7 +266,7 @@ def send_notify_warn():
round(swap_free / (swap_total + 0.1) * 100)
)
go(send_notification, title, body)
start_thread(send_notification, title, body)
def send_notify(threshold, name, pid):
@ -261,7 +289,7 @@ def send_notify(threshold, name, pid):
)
)
go(send_notification, title, body)
start_thread(send_notification, title, body)
def send_notify_etc(pid, name, command):
@ -277,43 +305,27 @@ def send_notify_etc(pid, name, command):
'mmand:\n<b>{}</b>'.format(
pid, name.replace('&', '*'), command.replace('&', '*'))
go(send_notification, title, body)
start_thread(send_notification, title, body)
def check_config():
"""
"""
log('#' * 79)
log('0. Common zram settings')
log('\n1. Common zram settings')
log(' zram_checking_enabled: {}'.format(zram_checking_enabled))
log(' zram_checking_enabled: {}'.format(zram_checking_enabled))
log('1. Thresholds below which a signal should be sent to the victim')
log(' soft_threshold_min_mem: {} MiB, {} %'.format(
round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1)))
log(' hard_threshold_min_mem: {} MiB, {} %'.format(
round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1)))
log(' soft_threshold_min_swap: {}'.format(soft_threshold_min_swap))
log(' hard_threshold_min_swap: {}'.format(hard_threshold_min_swap))
log(' soft_threshold_max_zram: {} MiB, {} %'.format(
round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1)))
log(' hard_threshold_max_zram: {} MiB, {} %'.format(
round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1)))
log('2. Response on PSI memory metrics')
log('\n2. Common PSI settings')
log(' psi_checking_enabled: {}'.format(psi_checking_enabled))
log(' psi_path: {}'.format(psi_path))
log(' psi_metrics: {}'.format(psi_metrics))
log(' soft_threshold_max_psi: {}'.format(soft_threshold_max_psi))
log(' hard_threshold_max_psi: {}'.format(hard_threshold_max_psi))
log(' psi_excess_duration: {} sec'.format(psi_excess_duration))
log(' psi_post_action_delay: {} sec'.format(psi_post_action_delay))
log('3. The frequency of checking the amount of available memory')
log('\n3. Poll rate')
log(' fill_rate_mem: {}'.format(fill_rate_mem))
log(' fill_rate_swap: {}'.format(fill_rate_swap))
@ -322,18 +334,56 @@ def check_config():
log(' min_sleep: {} sec'.format(min_sleep))
log(' over_sleep: {} sec'.format(over_sleep))
log('4. The prevention of killing innocent victims')
log('\n4. Warnings and notifications')
log(' min_badness: {}'.format(min_badness))
log(' post_soft_action_delay: {} sec'.format(post_soft_action_delay))
log(' post_zombie_delay: {} sec'.format(post_zombie_delay))
log(' victim_cache_time: {} sec'.format(victim_cache_time))
log(' ignore_positive_oom_score_adj: {}'.format(
ignore_positive_oom_score_adj))
log(' post_action_gui_notifications: {}'.format(
post_action_gui_notifications))
log('5. Impact on the badness of processes')
log(' low_memory_warnings_enabled: {}'.format(
low_memory_warnings_enabled))
log(' warning_exe: {}'.format(warning_exe))
log(' warning_threshold_min_mem: {} MiB, {} %'.format(round(
warning_threshold_min_mem_mb), round(
warning_threshold_min_mem_percent, 1)))
log(' warning_threshold_min_swap: {}'.format
(warning_threshold_min_swap))
log(' warning_threshold_max_zram: {} MiB, {} %'.format(round(
warning_threshold_max_zram_mb), round(
warning_threshold_max_zram_percent, 1)))
log(' warning_threshold_max_psi: {}'.format(
warning_threshold_max_psi))
log(' min_post_warning_delay: {} sec'.format(
min_post_warning_delay))
log('5.1. Matching process names with RE patterns')
log(' env_cache_time: {}'.format(env_cache_time))
log('\n5. Soft threshold')
log(' soft_threshold_min_mem: {} MiB, {} %'.format(round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1)))
log(' soft_threshold_min_swap: {}'.format(soft_threshold_min_swap))
log(' soft_threshold_max_zram: {} MiB, {} %'.format(round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1)))
log(' soft_threshold_max_psi: {}'.format(soft_threshold_max_psi))
log('\n6. Hard threshold')
log(' hard_threshold_min_mem: {} MiB, {} %'.format(round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1)))
log(' hard_threshold_min_swap: {}'.format(hard_threshold_min_swap))
log(' hard_threshold_max_zram: {} MiB, {} %'.format(round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1)))
log(' hard_threshold_max_psi: {}'.format(hard_threshold_max_psi))
log('\n7. Customize victim selection: adjusting badness of processes')
log('\n7.1. Ignore positive oom_score_adj')
log(' ignore_positive_oom_score_adj: {}'.format(ignore_positive_oom_score_adj))
log('\n7.2. Forbid negative badness')
log(' forbid_negative_badness: {}'.format(forbid_negative_badness))
log('\n7.3. ')
log('7.3.1. Matching process names with RE patterns')
if len(badness_adj_re_name_list) > 0:
log(' regexp: badness_adj:')
for i in badness_adj_re_name_list:
@ -341,7 +391,7 @@ def check_config():
else:
log(' (not set)')
log('5.2. Matching CGroup_v1-line with RE patterns')
log('7.3.2. Matching CGroup_v1-line with RE patterns')
if len(badness_adj_re_cgroup_v1_list) > 0:
log(' regexp: badness_adj:')
for i in badness_adj_re_cgroup_v1_list:
@ -349,7 +399,7 @@ def check_config():
else:
log(' (not set)')
log('5.3. Matching CGroup_v2-line with RE patterns')
log('7.3.3. Matching CGroup_v2-line with RE patterns')
if len(badness_adj_re_cgroup_v2_list) > 0:
log(' regexp: badness_adj:')
for i in badness_adj_re_cgroup_v1_list:
@ -357,7 +407,7 @@ def check_config():
else:
log(' (not set)')
log('5.4. Matching eUIDs with RE patterns')
log('7.3.4. Matching eUIDs with RE patterns')
if len(badness_adj_re_cgroup_v2_list) > 0:
log(' regexp: badness_adj:')
for i in badness_adj_re_uid_list:
@ -365,7 +415,7 @@ def check_config():
else:
log(' (not set)')
log('5.5. Matching realpath with RE patterns')
log('7.3.5. Matching realpath with RE patterns')
if len(badness_adj_re_cgroup_v2_list) > 0:
log(' regexp: badness_adj:')
for i in badness_adj_re_realpath_list:
@ -373,7 +423,7 @@ def check_config():
else:
log(' (not set)')
log('5.6. Matching cmdlines with RE patterns')
log('7.3.6. Matching cmdlines with RE patterns')
if len(badness_adj_re_cgroup_v2_list) > 0:
log(' regexp: badness_adj:')
for i in badness_adj_re_cmdline_list:
@ -381,7 +431,7 @@ def check_config():
else:
log(' (not set)')
log('5.7. Matching environ with RE patterns')
log('7.3.7. Matching environ with RE patterns')
if len(badness_adj_re_cgroup_v2_list) > 0:
log(' regexp: badness_adj:')
for i in badness_adj_re_environ_list:
@ -389,7 +439,7 @@ def check_config():
else:
log(' (not set)')
log('6. Customize corrective actions')
log('\n8. Customize corrective actions')
if len(soft_actions_list) > 0:
log(' Match by: regexp: command: ')
@ -398,45 +448,43 @@ def check_config():
else:
log(' (not set)')
log('7. GUI notifications')
log('\n9. Misc')
log(' post_action_gui_notifications: {}'.format(
post_action_gui_notifications))
log(' low_memory_warnings_enabled: {}'.format(
low_memory_warnings_enabled))
log(' warning_exe: {}'.format(warning_exe))
log(' warning_threshold_min_mem: {} MiB, {} %'.format(round(
warning_threshold_min_mem_mb), round(warning_threshold_min_mem_percent, 1)))
log(' warning_threshold_min_swap: {}'.format(warning_threshold_min_swap))
log(' warning_threshold_max_zram: {} MiB, {} %'.format(round(
warning_threshold_max_zram_mb), round(warning_threshold_max_zram_percent, 1)))
log(' warning_threshold_max_psi: {}'.format(warning_threshold_max_psi))
log(' min_post_warning_delay: {} sec'.format(min_post_warning_delay))
log(' max_soft_exit_time: {} sec'.format(max_soft_exit_time))
log('8. Verbosity')
log(' post_kill_exe: {}'.format(post_kill_exe))
log(' min_badness: {}'.format(min_badness))
log(' post_soft_action_delay: {} sec'.format(
post_soft_action_delay))
log(' post_zombie_delay: {} sec'.format(post_zombie_delay))
log(' victim_cache_time: {} sec'.format(victim_cache_time))
log(' exe_timeout: {} sec'.format(exe_timeout))
log('\n10. Verbosity')
log(' print_config_at_startup: {}'.format(print_config_at_startup))
log(' print_mem_check_results: {}'.format(print_mem_check_results))
log(' min_mem_report_interval: {} sec'.format(min_mem_report_interval))
log(' debug_sleep: {}'.format(debug_sleep))
log(' print_statistics: {}'.format(print_statistics))
log(' min_mem_report_interval: {} sec'.format(
min_mem_report_interval))
log(' print_proc_table: {}'.format(print_proc_table))
log(' extra_table_info: {}'.format(extra_table_info))
log(' print_victim_status: {}'.format(print_victim_status))
log(' print_victim_cmdline: {}'.format(print_victim_cmdline))
log(' max_victim_ancestry_depth: {}'.format(max_victim_ancestry_depth))
log(' print_statistics: {}'.format(print_statistics))
log(' debug_gui_notifications: {}'.format(debug_gui_notifications))
log(' separate_log: {}'.format(separate_log))
log(' debug_psi: {}'.format(debug_psi))
log(' debug_sleep: {}'.format(debug_sleep))
log(' debug_threading: {}'.format(debug_threading))
log(' separate_log: {}'.format(separate_log))
log('9. Misc')
log(' max_soft_exit_time: {} sec'.format(max_soft_exit_time))
log(' post_kill_exe: {}'.format(post_kill_exe))
log(' forbid_negative_badness: {}'.format(
forbid_negative_badness))
# log(': {}'.format())
log('#' * 79)
if check_config_flag:
@ -448,7 +496,6 @@ def get_swap_threshold_tuple(string):
# re (Num %, True) or (Num KiB, False)
"""Returns KiB value if abs val was set in config, or tuple with %"""
# return tuple with abs and bool: (abs %, True) or (abs MiB, False)
if string.endswith('%'):
valid = string_to_float_convert_test(string[:-1])
if valid is None:
@ -949,6 +996,11 @@ def errprint(*text):
"""
"""
print(*text, file=stderr, flush=True)
try:
if separate_log:
logging.info(*msg)
except NameError:
pass
def mlockall():
@ -1652,11 +1704,8 @@ def check_mem_swap_ex():
if (mem_available <= hard_threshold_min_mem_kb and
swap_free <= hard_threshold_min_swap_kb):
mem_info = 'Memory status that requ' \
'ires corrective actions (hard threshold exceeded):' \
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
'p_min_sigkill [{} MiB, {} %]'.format(
mem_info = 'Memory status that requires corrective actions:\n MemAvailable [{} MiB, {} %] <= hard_threshold_min_mem [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= hard_threshold_min_swap [{} MiB, {} %]'.format(
kib_to_mib(mem_available),
percent(mem_available / mem_total),
kib_to_mib(hard_threshold_min_mem_kb),
@ -1669,14 +1718,13 @@ def check_mem_swap_ex():
return (SIGKILL, mem_info, mem_available, hard_threshold_min_swap_kb,
soft_threshold_min_swap_kb, swap_free, swap_total)
if (mem_available <= soft_threshold_min_mem_kb and
swap_free <= soft_threshold_min_swap_kb):
mem_info = 'Memory status that requi' \
'res corrective actions (soft threshold exceeded):' \
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
'p_min_sigterm [{} MiB, {} %]'.format(
mem_info = 'Memory status that requires corrective actions:\n MemAvailable [{} MiB, {} %] <= soft_threshold_min_mem [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= soft_threshold_min_swap [{} MiB, {} %]'.format(
kib_to_mib(mem_available),
percent(mem_available / mem_total),
kib_to_mib(soft_threshold_min_mem_kb),
@ -1689,6 +1737,7 @@ def check_mem_swap_ex():
return (SIGTERM, mem_info, mem_available, hard_threshold_min_swap_kb,
soft_threshold_min_swap_kb, swap_free, swap_total)
if low_memory_warnings_enabled:
if (mem_available <= warning_threshold_min_mem_kb and swap_free <=
@ -1707,10 +1756,8 @@ def check_zram_ex():
if mem_used_zram >= hard_threshold_max_zram_kb:
mem_info = 'Memory status that requir' \
'es corrective actions (hard threshold exceeded):' \
'\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
'kill [{} MiB, {} %]'.format(
mem_info = 'Memory status that requires corrective actions:\n MemUsedZram [{} MiB, {} %] >= hard_threshold_max_zram [{} MiB, {} %]'.format(
kib_to_mib(mem_used_zram),
percent(mem_used_zram / mem_total),
kib_to_mib(hard_threshold_max_zram_kb),
@ -1718,11 +1765,10 @@ def check_zram_ex():
return SIGKILL, mem_info, mem_used_zram
if mem_used_zram >= soft_threshold_max_zram_kb:
mem_info = 'Memory status that requires corrective actions (soft th' \
'reshold exceeded):\n MemUsedZram [{} MiB, {} %] >= zram_max_s' \
'igterm [{} M, {} %]'.format(
mem_info = 'Memory status that requires corrective actions:\n MemUsedZram [{} MiB, {} %] >= soft_threshold_max_zram [{} M, {} %]'.format(
kib_to_mib(mem_used_zram),
percent(mem_used_zram / mem_total),
kib_to_mib(soft_threshold_max_zram_kb),
@ -1871,6 +1917,20 @@ def is_victim_alive(victim_id):
return 0
def implement_corrective_action(
threshold,
mem_info_list,
@ -1882,6 +1942,8 @@ def implement_corrective_action(
zram_threshold,
zram_info,
psi_info):
""" great and terrible function
"""
log('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
@ -2067,7 +2129,7 @@ def implement_corrective_action(
cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name(
pid)).replace('$SERVICE', service)
go(exe, cmd)
start_thread(exe, cmd)
"""
if exit_status == 0:
@ -2212,7 +2274,7 @@ def implement_corrective_action(
log('Execute post_kill_exe')
go(exe, cmd)
start_thread(exe, cmd)
if post_action_gui_notifications:
if soft_match:
@ -2246,6 +2308,23 @@ def implement_corrective_action(
return psi_t0
def sleep_after_check_mem():
"""Specify sleep times depends on rates and avialable memory."""
@ -2372,7 +2451,7 @@ def calculate_percent(arg_key):
return mem_min_kb, mem_min_mb, mem_min_percent
##########################################################################
###############################################################################
# {victim_id : {'time': timestamp, 'name': name}
@ -2547,7 +2626,7 @@ except ValueError:
log('config: ' + config)
##########################################################################
###############################################################################
# parsing the config with obtaining the parameters dictionary
@ -2750,12 +2829,11 @@ else:
soft_actions = True
##########################################################################
###############################################################################
# post_zombie_delay = 0.1
# victim_cache_time = 50
# extracting parameters from the dictionary
@ -2777,8 +2855,6 @@ post_action_gui_notifications = conf_parse_bool(
'post_action_gui_notifications')
if low_memory_warnings_enabled or post_action_gui_notifications:
from subprocess import Popen, TimeoutExpired
debug_threading = conf_parse_bool('debug_threading')
@ -2850,13 +2926,35 @@ if 'env_cache_time' in config_dict:
errprint('Invalid env_cache_time value, not float\nExit')
exit(1)
if env_cache_time < 0:
errprint('fill_rate_mem MUST be >= 0\nExit')
errprint('env_cache_time MUST be >= 0\nExit')
exit(1)
else:
errprint('fill_rate_mem not in config\nExit')
errprint('env_cache_time not in config\nExit')
exit(1)
if 'exe_timeout' in config_dict:
exe_timeout = string_to_float_convert_test(
config_dict['exe_timeout'])
if exe_timeout is None:
errprint('Invalid exe_timeout value, not float\nExit')
exit(1)
if exe_timeout <= 0:
errprint('exe_timeout MUST be > 0\nExit')
exit(1)
else:
errprint('exe_timeout not in config\nExit')
exit(1)
if 'fill_rate_mem' in config_dict:
fill_rate_mem = string_to_float_convert_test(config_dict['fill_rate_mem'])
if fill_rate_mem is None:
@ -3230,9 +3328,26 @@ if print_proc_table_flag:
func_print_proc_table()
##########################################################################
if (low_memory_warnings_enabled or \
post_action_gui_notifications or \
check_warning_exe or \
soft_actions or \
post_kill_exe != ''):
import threading
from subprocess import Popen, TimeoutExpired
psi_support = os.path.exists(psi_path)
@ -3298,7 +3413,7 @@ fill_rate_zram = fill_rate_zram * 1024
warn_time_now = 0
warn_time_delta = 1000
warn_time_delta = 1000 # ?
warn_timer = 0
@ -3372,6 +3487,15 @@ envd = dict()
envd['list_with_envs'] = envd['t'] = None
cmd_num_dict = dict()
cmd_num_dict['cmd_num'] = 0
##########################################################################

View File

@ -6,188 +6,240 @@
The configuration includes the following sections:
0. Common zram settings
1. Memory levels to respond to as an OOM threat
2. Response on PSI memory metrics
3. The frequency of checking the level of available memory
(and CPU usage)
4. The prevention of killing innocent victims
5. Impact on the badness of processes via matching their names, cgroups and
cmdlines with specified regular expressions
6. Customize corrective actions: the execution of a specific command
instead of sending the SIGTERM signal
7. GUI notifications:
- low memory warnings
- OOM prevention results
8. Output verbosity
9. Misc
1. Common zram settings
2. Common PSI settings
3. Poll rate
4. Warnings and notifications
5. Soft threshold
6. Hard threshold
7. Customize victim selection: adjusting badness of processes
8. Customize soft corrective actions
9. Misc settings
10. Verbosity, debug, logging
Just read the description of the parameters and edit the values.
Please restart the program after editing the config.
More docs will be written later.
TODO: improve descriptions
###############################################################################
0. Common zram settings
1. Common zram settings
See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize.
Key: zram_checking_enabled
Description:
Type: boolean
Valid values: True and False
Default value: False
zram_checking_enabled = False
###############################################################################
1. Thresholds below which a signal should be sent to the victim
2. Common PSI settings
Sets the available memory levels at or below which SIGTERM or SIGKILL
signals are sent. The signal will be sent if MemAvailable and
SwapFree (in /proc/meminfo) at the same time will drop below the
corresponding values. Can be specified in % (percent) and M (MiB).
Valid values are floating-point numbers from the range [0; 100] %.
MemAvailable levels.
soft_threshold_min_mem = 8 %
hard_threshold_min_mem = 4 %
SwapFree levels.
soft_threshold_min_swap = 10 %
hard_threshold_min_swap = 5 %
Specifying the total share of zram in memory, if exceeded the
corresponding signals are sent. As the share of zram in memory
increases, it may fall responsiveness of the system. 90 % is a
usual hang level, not recommended to set very high.
Can be specified in % and M. Valid values are floating-point
numbers from the range [0; 90] %.
soft_threshold_max_zram = 60 %
hard_threshold_max_zram = 65 %
###############################################################################
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
About PSI:
https://facebookmicrosites.github.io/psi/
Disabled by default (psi_checking_enabled = False).
Description:
Type: boolean
Valid values: True and False
psi_checking_enabled = False
Choose a path to PSI file.
By default it monitors system-wide file: /proc/pressure/memory
You also can set file to monitor one cgroup slice.
For example:
psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
Execute the command
find /sys/fs/cgroup -name memory.pressure
to find available memory.pressue files (except /proc/pressure/memory).
(actual for cgroup2)
Description:
Type: string
Valid values:
psi_path = /proc/pressure/memory
Valid psi_metrics are:
some_avg10
some_avg60
some_avg300
full_avg10
full_avg60
full_avg300
some_avg10 is most sensitive.
Description:
Type: string
Valid values:
psi_metrics = some_avg10
soft_threshold_max_psi = 60
Description:
Type: float
Valid values:
hard_threshold_max_psi = 90
>= 0, float
psi_excess_duration = 60
Description:
Type: float
Valid values:
psi_post_action_delay = 60
###############################################################################
3. The frequency of checking the amount of available memory
(and CPU usage)
3. Poll rate
Coefficients that affect the intensity of monitoring. Reducing
the coefficients can reduce CPU usage and increase the periods
between memory checks.
Why three coefficients instead of one? Because the swap fill rate
is usually lower than the RAM fill rate.
It is possible to set a lower intensity of monitoring for swap
without compromising to prevent OOM and thus reduce the CPU load.
Default values are well for desktop. On servers without rapid
fluctuations in memory levels the values can be reduced.
Valid values are positive floating-point numbers.
Description:
Type: float
Valid values:
fill_rate_mem = 4000
Description:
Type: float
Valid values:
fill_rate_swap = 1500
Description:
Type: float
Valid values:
fill_rate_zram = 6000
See also https://github.com/rfjakob/earlyoom/issues/61
Description:
Type: float
Valid values:
max_sleep = 3
Description:
Type: float
Valid values:
min_sleep = 0.1
Sleep time if soft threshold exceeded.
Description:
Type: float
Valid values:
over_sleep = 0.05
###############################################################################
4. The prevention of killing innocent victims
4. Warnings and notifications
Valid values are integers from the range [0; 1000].
Description:
Type: boolean
Valid values: True and False
min_badness = 10
post_action_gui_notifications = True
Valid values are non-negative floating-point numbers.
Min delay if a victim doesn't respond to SIGTERM in 10 ms.
Description:
Type: boolean
Valid values: True and False
post_soft_action_delay = 3
low_memory_warnings_enabled = True
post_zombie_delay = 0.1
Description:
Type: string
Valid values:
victim_cache_time = 10
warning_exe =
Valid values are True and False.
Description:
Type: float (+ % or M)
Valid values:
ignore_positive_oom_score_adj = False
warning_threshold_min_mem = 20 %
Description:
Type: float (+ % or M)
Valid values:
warning_threshold_min_swap = 20 %
Description:
Type: float (+ % or M)
Valid values:
warning_threshold_max_zram = 50 %
Description:
Type: float
Valid values:
warning_threshold_max_psi = 100
Description:
Type: float
Valid values:
min_post_warning_delay = 30
Description:
Type: float
Valid values:
env_cache_time = 300
###############################################################################
5. Impact on the badness of processes via matching their names,
cmdlines or UIDs with regular expressions using re.search().
5. Soft threshold
See https://en.wikipedia.org/wiki/Regular_expression and
https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
Description:
Type: float (+ % or M)
Valid values:
Enabling this options slows down the search for the victim
because the names, cmdlines or UIDs of all processes
(except init and kthreads) are compared with the
specified regex patterns (in fact slowing down is caused by
reading all /proc/*/cmdline and /proc/*/status files).
soft_threshold_min_mem = 8 %
Use script `oom-sort` from nohang package to view
names, cmdlines and UIDs of processes.
Description:
Type: float (+ % or M)
Valid values:
5.1. Matching process names with RE patterns
soft_threshold_min_swap = 8 %
Description:
Type: float (+ % or M)
Valid values:
soft_threshold_max_zram = 60 %
Description:
Type: float
Valid values:
soft_threshold_max_psi = 60
###############################################################################
6. Hard threshold
hard_threshold_min_mem = 4 %
Description:
Type: float (+ % or M)
Valid values:
hard_threshold_min_swap = 4 %
Description:
Type: float (+ % or M)
Valid values:
hard_threshold_max_zram = 65 %
Description:
Type: float
Valid values:
hard_threshold_max_psi = 90
###############################################################################
7. Customize victim selection: adjusting badness of processes
7.1. Ignore positive oom_score_adj
Description:
Type: boolean
Valid values: True and False
ignore_positive_oom_score_adj = False
7.2. Forbid negative badness
Description:
Type: boolean
Valid values: True and False
forbid_negative_badness = True
7.3.1. Matching process names with RE patterns change their badness
Syntax:
@ -204,28 +256,27 @@ ignore_positive_oom_score_adj = False
Prefer firefox tabs
@BADNESS_ADJ_RE_NAME 300 /// ^Web Content$
7.3.2. Matching CGroup_v1-line with RE patterns
5.2. Matching CGroup_v1-line with RE patterns
@BADNESS_ADJ_RE_CGROUP_V1 -100 /// ^/system\.slice/
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
5.3. Matching CGroup_v2-line with RE patterns
7.3.3. Matching CGroup_v2-line with RE patterns
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
5.4. Matching eUIDs with RE patterns
7.3.4. Matching eUIDs with RE patterns
@BADNESS_ADJ_RE_UID -100 /// ^0$
5.5. Matching realpath with RE patterns
7.3.5. Matching realpath with RE patterns
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
5.6. Matching cmdlines with RE patterns
7.3.6. Matching cmdlines with RE patterns
A good option that allows fine adjustment.
@ -233,21 +284,22 @@ ignore_positive_oom_score_adj = False
@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
Prefer firefox tabs (Web Content and WebExtensions)
@BADNESS_ADJ_RE_CMDLINE 300 /// -appomni
@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
5.7. Matching environ with RE patterns
7.3.7. Matching environ with RE patterns
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
Note that you can control badness also via systemd units via
OOMScoreAdjust, see
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
###############################################################################
6. Customize corrective actions.
8. Customize soft corrective actions
TODO: docs
@ -260,6 +312,8 @@ ignore_positive_oom_score_adj = False
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
@SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID
$PID will be replaced by process PID.
$NAME will be replaced by process name.
$SERVICE will be replaced by .service if it exists (overwise it will be
@ -267,60 +321,80 @@ ignore_positive_oom_score_adj = False
###############################################################################
7. GUI notifications & low memory warnings
9. Misc settings
post_action_gui_notifications = True
Description:
Type: float
Valid values:
Enable GUI notifications about the low level of available memory.
Valid values are True and False.
max_soft_exit_time = 10
low_memory_warnings_enabled = True
Description:
Type: string
Valid values:
Execute the command instead of sending GUI notifications if the value is
not empty line. For example:
warning_exe = cat /proc/meminfo &
post_kill_exe =
warning_exe =
Description:
Type: integer
Valid values:
Can be specified in % (percent) and M (MiB).
Valid values are floating-point numbers from the range [0; 100] %.
min_badness = 10
warning_threshold_min_mem = 20 %
Description:
Type: float
Valid values:
warning_threshold_min_swap = 25 %
post_soft_action_delay = 3
warning_threshold_max_zram = 50 %
Description:
Type: float
Valid values:
warning_threshold_max_psi = 100
post_zombie_delay = 0.1
Valid values are floating-point numbers from the range [1; 300].
Description:
Type: float
Valid values:
min_post_warning_delay = 30
victim_cache_time = 10
env_cache_time = 300
Description:
Type: float
Valid values:
Ampersands (&) will be replaced with asterisks (*) in process
names and in commands.
exe_timeout = 20
###############################################################################
8. Verbosity
10. Verbosity, debug, logging
Display the configuration when the program starts.
Valid values are True and False.
Description:
Type: boolean
Valid values: True and False
print_config_at_startup = False
Print memory check results.
Valid values are True and False.
Description:
Type: boolean
Valid values: True and False
print_mem_check_results = False
Description:
Type: float
Valid values:
min_mem_report_interval = 60
Description:
Type: boolean
Valid values: True and False
print_proc_table = False
Description:
Type: string
Valid values:
None
cgroup_v1
@ -331,36 +405,59 @@ print_proc_table = False
extra_table_info = None
Description:
Type: boolean
Valid values: True and False
print_victim_status = True
max_victim_ancestry_depth = 3
Description:
Type: boolean
Valid values: True and False
print_victim_cmdline = False
Description:
Type: integer
Valid values:
max_victim_ancestry_depth = 3
Description:
Type: boolean
Valid values: True and False
print_statistics = True
Print sleep periods between memory checks.
Valid values are True and False.
Description:
Type: boolean
Valid values: True and False
debug_psi = False
Description:
Type: boolean
Valid values: True and False
debug_gui_notifications = False
Description:
Type: boolean
Valid values: True and False
debug_sleep = False
separate_log = False
Description:
Type: boolean
Valid values: True and False
debug_threading = False
###############################################################################
Description:
Type: boolean
Valid values: True and False
9. Misc
max_soft_exit_time = 10
post_kill_exe =
forbid_negative_badness = True
separate_log = False
###############################################################################

View File

@ -6,188 +6,240 @@
The configuration includes the following sections:
0. Common zram settings
1. Memory levels to respond to as an OOM threat
2. Response on PSI memory metrics
3. The frequency of checking the level of available memory
(and CPU usage)
4. The prevention of killing innocent victims
5. Impact on the badness of processes via matching their names, cgroups and
cmdlines with specified regular expressions
6. Customize corrective actions: the execution of a specific command
instead of sending the SIGTERM signal
7. GUI notifications:
- low memory warnings
- OOM prevention results
8. Output verbosity
9. Misc
1. Common zram settings
2. Common PSI settings
3. Poll rate
4. Warnings and notifications
5. Soft threshold
6. Hard threshold
7. Customize victim selection: adjusting badness of processes
8. Customize soft corrective actions
9. Misc settings
10. Verbosity, debug, logging
Just read the description of the parameters and edit the values.
Please restart the program after editing the config.
More docs will be written later.
TODO: improve descriptions
###############################################################################
0. Common zram settings
1. Common zram settings
See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize.
Key: zram_checking_enabled
Description:
Type: boolean
Valid values: True and False
Default value: False
zram_checking_enabled = False
###############################################################################
1. Thresholds below which a signal should be sent to the victim
2. Common PSI settings
Sets the available memory levels at or below which SIGTERM or SIGKILL
signals are sent. The signal will be sent if MemAvailable and
SwapFree (in /proc/meminfo) at the same time will drop below the
corresponding values. Can be specified in % (percent) and M (MiB).
Valid values are floating-point numbers from the range [0; 100] %.
MemAvailable levels.
soft_threshold_min_mem = 8 %
hard_threshold_min_mem = 4 %
SwapFree levels.
soft_threshold_min_swap = 10 %
hard_threshold_min_swap = 5 %
Specifying the total share of zram in memory, if exceeded the
corresponding signals are sent. As the share of zram in memory
increases, it may fall responsiveness of the system. 90 % is a
usual hang level, not recommended to set very high.
Can be specified in % and M. Valid values are floating-point
numbers from the range [0; 90] %.
soft_threshold_max_zram = 60 %
hard_threshold_max_zram = 65 %
###############################################################################
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
About PSI:
https://facebookmicrosites.github.io/psi/
Disabled by default (psi_checking_enabled = False).
Description:
Type: boolean
Valid values: True and False
psi_checking_enabled = False
Choose a path to PSI file.
By default it monitors system-wide file: /proc/pressure/memory
You also can set file to monitor one cgroup slice.
For example:
psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
Execute the command
find /sys/fs/cgroup -name memory.pressure
to find available memory.pressue files (except /proc/pressure/memory).
(actual for cgroup2)
Description:
Type: string
Valid values:
psi_path = /proc/pressure/memory
Valid psi_metrics are:
some_avg10
some_avg60
some_avg300
full_avg10
full_avg60
full_avg300
some_avg10 is most sensitive.
Description:
Type: string
Valid values:
psi_metrics = some_avg10
soft_threshold_max_psi = 60
Description:
Type: float
Valid values:
hard_threshold_max_psi = 90
>= 0, float
psi_excess_duration = 60
Description:
Type: float
Valid values:
psi_post_action_delay = 60
###############################################################################
3. The frequency of checking the amount of available memory
(and CPU usage)
3. Poll rate
Coefficients that affect the intensity of monitoring. Reducing
the coefficients can reduce CPU usage and increase the periods
between memory checks.
Why three coefficients instead of one? Because the swap fill rate
is usually lower than the RAM fill rate.
It is possible to set a lower intensity of monitoring for swap
without compromising to prevent OOM and thus reduce the CPU load.
Default values are well for desktop. On servers without rapid
fluctuations in memory levels the values can be reduced.
Valid values are positive floating-point numbers.
Description:
Type: float
Valid values:
fill_rate_mem = 4000
Description:
Type: float
Valid values:
fill_rate_swap = 1500
Description:
Type: float
Valid values:
fill_rate_zram = 6000
See also https://github.com/rfjakob/earlyoom/issues/61
Description:
Type: float
Valid values:
max_sleep = 3
Description:
Type: float
Valid values:
min_sleep = 0.1
Sleep time if soft threshold exceeded.
Description:
Type: float
Valid values:
over_sleep = 0.05
###############################################################################
4. The prevention of killing innocent victims
4. Warnings and notifications
Valid values are integers from the range [0; 1000].
Description:
Type: boolean
Valid values: True and False
min_badness = 10
post_action_gui_notifications = False
Valid values are non-negative floating-point numbers.
Min delay if a victim doesn't respond to SIGTERM in 10 ms.
Description:
Type: boolean
Valid values: True and False
post_soft_action_delay = 3
low_memory_warnings_enabled = False
post_zombie_delay = 0.1
Description:
Type: string
Valid values:
victim_cache_time = 10
warning_exe =
Valid values are True and False.
Description:
Type: float (+ % or M)
Valid values:
ignore_positive_oom_score_adj = False
warning_threshold_min_mem = 20 %
Description:
Type: float (+ % or M)
Valid values:
warning_threshold_min_swap = 20 %
Description:
Type: float (+ % or M)
Valid values:
warning_threshold_max_zram = 50 %
Description:
Type: float
Valid values:
warning_threshold_max_psi = 100
Description:
Type: float
Valid values:
min_post_warning_delay = 30
Description:
Type: float
Valid values:
env_cache_time = 300
###############################################################################
5. Impact on the badness of processes via matching their names,
cmdlines or UIDs with regular expressions using re.search().
5. Soft threshold
See https://en.wikipedia.org/wiki/Regular_expression and
https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
Description:
Type: float (+ % or M)
Valid values:
Enabling this options slows down the search for the victim
because the names, cmdlines or UIDs of all processes
(except init and kthreads) are compared with the
specified regex patterns (in fact slowing down is caused by
reading all /proc/*/cmdline and /proc/*/status files).
soft_threshold_min_mem = 8 %
Use script `oom-sort` from nohang package to view
names, cmdlines and UIDs of processes.
Description:
Type: float (+ % or M)
Valid values:
5.1. Matching process names with RE patterns
soft_threshold_min_swap = 8 %
Description:
Type: float (+ % or M)
Valid values:
soft_threshold_max_zram = 60 %
Description:
Type: float
Valid values:
soft_threshold_max_psi = 60
###############################################################################
6. Hard threshold
hard_threshold_min_mem = 4 %
Description:
Type: float (+ % or M)
Valid values:
hard_threshold_min_swap = 4 %
Description:
Type: float (+ % or M)
Valid values:
hard_threshold_max_zram = 65 %
Description:
Type: float
Valid values:
hard_threshold_max_psi = 90
###############################################################################
7. Customize victim selection: adjusting badness of processes
7.1. Ignore positive oom_score_adj
Description:
Type: boolean
Valid values: True and False
ignore_positive_oom_score_adj = False
7.2. Forbid negative badness
Description:
Type: boolean
Valid values: True and False
forbid_negative_badness = True
7.3.1. Matching process names with RE patterns change their badness
Syntax:
@ -201,27 +253,27 @@ ignore_positive_oom_score_adj = False
Example:
@BADNESS_ADJ_RE_NAME -500 /// ^sshd$
5.2. Matching CGroup_v1-line with RE patterns
7.3.2. Matching CGroup_v1-line with RE patterns
@BADNESS_ADJ_RE_CGROUP_V1 -100 /// ^/system\.slice/
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
5.3. Matching CGroup_v2-line with RE patterns
7.3.3. Matching CGroup_v2-line with RE patterns
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
5.4. Matching eUIDs with RE patterns
7.3.4. Matching eUIDs with RE patterns
@BADNESS_ADJ_RE_UID -100 /// ^0$
5.5. Matching realpath with RE patterns
7.3.5. Matching realpath with RE patterns
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
5.6. Matching cmdlines with RE patterns
7.3.6. Matching cmdlines with RE patterns
A good option that allows fine adjustment.
@ -229,21 +281,22 @@ ignore_positive_oom_score_adj = False
@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
Prefer firefox tabs (Web Content and WebExtensions)
@BADNESS_ADJ_RE_CMDLINE 300 /// -appomni
@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
5.7. Matching environ with RE patterns
7.3.7. Matching environ with RE patterns
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
Note that you can control badness also via systemd units via
OOMScoreAdjust, see
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
###############################################################################
6. Customize corrective actions.
8. Customize soft corrective actions
TODO: docs
@ -256,6 +309,8 @@ ignore_positive_oom_score_adj = False
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
@SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID
$PID will be replaced by process PID.
$NAME will be replaced by process name.
$SERVICE will be replaced by .service if it exists (overwise it will be
@ -263,59 +318,80 @@ ignore_positive_oom_score_adj = False
###############################################################################
7. GUI notifications & low memory warnings
9. Misc settings
post_action_gui_notifications = False
Description:
Type: float
Valid values:
Enable GUI notifications about the low level of available memory.
Valid values are True and False.
max_soft_exit_time = 10
low_memory_warnings_enabled = False
Description:
Type: string
Valid values:
Execute the command instead of sending GUI notifications if the value is
not empty line. For example:
warning_exe = cat /proc/meminfo &
post_kill_exe =
warning_exe =
Description:
Type: integer
Valid values:
Can be specified in % (percent) and M (MiB).
Valid values are floating-point numbers from the range [0; 100] %.
min_badness = 10
warning_threshold_min_mem = 20 %
Description:
Type: float
Valid values:
warning_threshold_min_swap = 25 %
post_soft_action_delay = 3
warning_threshold_max_zram = 50 %
Description:
Type: float
Valid values:
warning_threshold_max_psi = 100
post_zombie_delay = 0.1
Valid values are floating-point numbers from the range [1; 300].
Description:
Type: float
Valid values:
min_post_warning_delay = 20
victim_cache_time = 10
env_cache_time = 300
Description:
Type: float
Valid values:
Ampersands (&) will be replaced with asterisks (*) in process
names and in commands.
exe_timeout = 20
###############################################################################
8. Verbosity
10. Verbosity, debug, logging
Display the configuration when the program starts.
Valid values are True and False.
Description:
Type: boolean
Valid values: True and False
print_config_at_startup = False
Print memory check results.
Valid values are True and False.
Description:
Type: boolean
Valid values: True and False
print_mem_check_results = False
Description:
Type: float
Valid values:
min_mem_report_interval = 60
Description:
Type: boolean
Valid values: True and False
print_proc_table = False
Description:
Type: string
Valid values:
None
cgroup_v1
@ -326,36 +402,59 @@ print_proc_table = False
extra_table_info = None
Description:
Type: boolean
Valid values: True and False
print_victim_status = True
max_victim_ancestry_depth = 3
Description:
Type: boolean
Valid values: True and False
print_victim_cmdline = False
Description:
Type: integer
Valid values:
max_victim_ancestry_depth = 3
Description:
Type: boolean
Valid values: True and False
print_statistics = True
Print sleep periods between memory checks.
Valid values are True and False.
Description:
Type: boolean
Valid values: True and False
debug_psi = False
Description:
Type: boolean
Valid values: True and False
debug_gui_notifications = False
Description:
Type: boolean
Valid values: True and False
debug_sleep = False
separate_log = False
Description:
Type: boolean
Valid values: True and False
debug_threading = False
###############################################################################
Description:
Type: boolean
Valid values: True and False
9. Misc
max_soft_exit_time = 10
post_kill_exe =
forbid_negative_badness = True
separate_log = False
###############################################################################

453
test.conf
View File

@ -6,186 +6,240 @@
The configuration includes the following sections:
0. Common zram settings
1. Memory levels to respond to as an OOM threat
2. Response on PSI memory metrics
3. The frequency of checking the level of available memory
(and CPU usage)
4. The prevention of killing innocent victims
5. Impact on the badness of processes via matching their names, cgroups and
cmdlines with specified regular expressions
6. Customize corrective actions: the execution of a specific command
instead of sending the SIGTERM signal
7. GUI notifications:
- low memory warnings
- OOM prevention results
8. Output verbosity
9. Misc
1. Common zram settings
2. Common PSI settings
3. Poll rate
4. Warnings and notifications
5. Soft threshold
6. Hard threshold
7. Customize victim selection: adjusting badness of processes
8. Customize soft corrective actions
9. Misc settings
10. Verbosity, debug, logging
Just read the description of the parameters and edit the values.
Please restart the program after editing the config.
TODO: improve descriptions
###############################################################################
0. Common zram settings
1. Common zram settings
See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize.
Key: zram_checking_enabled
Description:
Type: boolean
Valid values: True and False
Default value: False
zram_checking_enabled = True
###############################################################################
1. Thresholds below which a signal should be sent to the victim
2. Common PSI settings
Sets the available memory levels at or below which SIGTERM or SIGKILL
signals are sent. The signal will be sent if MemAvailable and
SwapFree (in /proc/meminfo) at the same time will drop below the
corresponding values. Can be specified in % (percent) and M (MiB).
Valid values are floating-point numbers from the range [0; 100] %.
MemAvailable levels.
soft_threshold_min_mem = 10 %
hard_threshold_min_mem = 5 %
SwapFree levels.
soft_threshold_min_swap = 15 %
hard_threshold_min_swap = 5 %
Specifying the total share of zram in memory, if exceeded the
corresponding signals are sent. As the share of zram in memory
increases, it may fall responsiveness of the system. 90 % is a
usual hang level, not recommended to set very high.
Can be specified in % and M. Valid values are floating-point
numbers from the range [0; 90] %.
soft_threshold_max_zram = 50 %
hard_threshold_max_zram = 60 %
###############################################################################
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
About PSI:
https://facebookmicrosites.github.io/psi/
Disabled by default (psi_checking_enabled = False).
Description:
Type: boolean
Valid values: True and False
psi_checking_enabled = True
Choose a path to PSI file.
By default it monitors system-wide file: /proc/pressure/memory
You also can set file to monitor one cgroup slice.
For example:
psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
Execute the command
find /sys/fs/cgroup -name memory.pressure
to find available memory.pressue files (except /proc/pressure/memory).
(actual for cgroup2)
Description:
Type: string
Valid values:
psi_path = /proc/pressure/memory
Valid psi_metrics are:
some_avg10
some_avg60
some_avg300
full_avg10
full_avg60
full_avg300
some_avg10 is most sensitive.
Description:
Type: string
Valid values:
psi_metrics = some_avg10
soft_threshold_max_psi = 60
Description:
Type: float
Valid values:
hard_threshold_max_psi = 90
>= 0, float
psi_excess_duration = 60
Description:
Type: float
Valid values:
psi_post_action_delay = 60
###############################################################################
3. The frequency of checking the amount of available memory
(and CPU usage)
3. Poll rate
Coefficients that affect the intensity of monitoring. Reducing
the coefficients can reduce CPU usage and increase the periods
between memory checks.
Why three coefficients instead of one? Because the swap fill rate
is usually lower than the RAM fill rate.
It is possible to set a lower intensity of monitoring for swap
without compromising to prevent OOM and thus reduce the CPU load.
Default values are well for desktop. On servers without rapid
fluctuations in memory levels the values can be reduced.
Valid values are positive floating-point numbers.
Description:
Type: float
Valid values:
fill_rate_mem = 4000
Description:
Type: float
Valid values:
fill_rate_swap = 1500
Description:
Type: float
Valid values:
fill_rate_zram = 6000
See also https://github.com/rfjakob/earlyoom/issues/61
Description:
Type: float
Valid values:
max_sleep = 3
Description:
Type: float
Valid values:
min_sleep = 0.1
Sleep time if soft threshold exceeded.
Description:
Type: float
Valid values:
over_sleep = 0.05
###############################################################################
4. The prevention of killing innocent victims
4. Warnings and notifications
Valid values are integers from the range [0; 1000].
Description:
Type: boolean
Valid values: True and False
min_badness = 20
post_action_gui_notifications = True
Valid values are non-negative floating-point numbers.
Min delay if a victim doesn't respond to SIGTERM in 10 ms.
Description:
Type: boolean
Valid values: True and False
post_soft_action_delay = 3
low_memory_warnings_enabled = True
post_zombie_delay = 0.1
Description:
Type: string
Valid values:
victim_cache_time = 10
warning_exe =
Valid values are True and False.
Description:
Type: float (+ % or M)
Valid values:
ignore_positive_oom_score_adj = True
warning_threshold_min_mem = 20 %
Description:
Type: float (+ % or M)
Valid values:
warning_threshold_min_swap = 20 %
Description:
Type: float (+ % or M)
Valid values:
warning_threshold_max_zram = 50 %
Description:
Type: float
Valid values:
warning_threshold_max_psi = 100
Description:
Type: float
Valid values:
min_post_warning_delay = 30
Description:
Type: float
Valid values:
env_cache_time = 300
###############################################################################
5. Impact on the badness of processes via matching their names,
cmdlines or UIDs with regular expressions using re.search().
5. Soft threshold
See https://en.wikipedia.org/wiki/Regular_expression and
https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
Description:
Type: float (+ % or M)
Valid values:
Enabling this options slows down the search for the victim
because the names, cmdlines or UIDs of all processes
(except init and kthreads) are compared with the
specified regex patterns (in fact slowing down is caused by
reading all /proc/*/cmdline and /proc/*/status files).
soft_threshold_min_mem = 8 %
Use script `oom-sort` from nohang package to view
names, cmdlines and UIDs of processes.
Description:
Type: float (+ % or M)
Valid values:
5.1. Matching process names with RE patterns
soft_threshold_min_swap = 8 %
Description:
Type: float (+ % or M)
Valid values:
soft_threshold_max_zram = 60 %
Description:
Type: float
Valid values:
soft_threshold_max_psi = 60
###############################################################################
6. Hard threshold
hard_threshold_min_mem = 4 %
Description:
Type: float (+ % or M)
Valid values:
hard_threshold_min_swap = 4 %
Description:
Type: float (+ % or M)
Valid values:
hard_threshold_max_zram = 65 %
Description:
Type: float
Valid values:
hard_threshold_max_psi = 90
###############################################################################
7. Customize victim selection: adjusting badness of processes
7.1. Ignore positive oom_score_adj
Description:
Type: boolean
Valid values: True and False
ignore_positive_oom_score_adj = True
7.2. Forbid negative badness
Description:
Type: boolean
Valid values: True and False
forbid_negative_badness = True
7.3.1. Matching process names with RE patterns change their badness
Syntax:
@ -199,61 +253,69 @@ ignore_positive_oom_score_adj = True
Example:
@BADNESS_ADJ_RE_NAME -500 /// ^sshd$
5.2. Matching CGroup_v1-line with RE patterns
7.3.2. Matching CGroup_v1-line with RE patterns
@BADNESS_ADJ_RE_CGROUP_V1 5 /// ^/system\.slice/
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
5.3. Matching CGroup_v2-line with RE patterns
7.3.3. Matching CGroup_v2-line with RE patterns
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
5.4. Matching eUIDs with RE patterns
7.3.4. Matching eUIDs with RE patterns
@BADNESS_ADJ_RE_UID 50 /// ^0$
@BADNESS_ADJ_RE_UID -100 /// ^0$
5.5. Matching realpath with RE patterns
7.3.5. Matching realpath with RE patterns
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
5.6. Matching cmdlines with RE patterns
7.3.6. Matching cmdlines with RE patterns
A good option that allows fine adjustment.
Prefer chromium tabs and electron-based apps
@BADNESS_ADJ_RE_CMDLINE 2000 /// ^/bin/sleep
Prefer chromium tabs and electron-based apps
@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
Prefer firefox tabs (Web Content and WebExtensions)
@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
5.7. Matching environ with RE patterns
7.3.7. Matching environ with RE patterns
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
Note that you can control badness also via systemd units via
OOMScoreAdjust, see
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
###############################################################################
6. Customize corrective actions.
8. Customize soft corrective actions
TODO: docs
Syntax:
KEY REGEXP SEPARATOR COMMAND
@SOFT_ACTION_RE_NAME ^tail$ /// kill -SEGV $PID
@SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID
@SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
@SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID
$PID will be replaced by process PID.
$NAME will be replaced by process name.
$SERVICE will be replaced by .service if it exists (overwise it will be
@ -261,59 +323,80 @@ ignore_positive_oom_score_adj = True
###############################################################################
7. GUI notifications & low memory warnings
9. Misc settings
post_action_gui_notifications = True
Description:
Type: float
Valid values:
Enable GUI notifications about the low level of available memory.
Valid values are True and False.
max_soft_exit_time = 10
low_memory_warnings_enabled = True
Description:
Type: string
Valid values:
Execute the command instead of sending GUI notifications if the value is
not empty line. For example:
warning_exe = cat /proc/meminfo &
post_kill_exe =
warning_exe = echo 0
Description:
Type: integer
Valid values:
Can be specified in % (percent) and M (MiB).
Valid values are floating-point numbers from the range [0; 100] %.
min_badness = 10
warning_threshold_min_mem = 25 %
Description:
Type: float
Valid values:
warning_threshold_min_swap = 35 %
post_soft_action_delay = 3
warning_threshold_max_zram = 40 %
Description:
Type: float
Valid values:
warning_threshold_max_psi = 100
post_zombie_delay = 0.1
Valid values are floating-point numbers from the range [1; 300].
Description:
Type: float
Valid values:
min_post_warning_delay = 20
victim_cache_time = 10
env_cache_time = 300
Description:
Type: float
Valid values:
Ampersands (&) will be replaced with asterisks (*) in process
names and in commands.
exe_timeout = 20
###############################################################################
8. Verbosity
10. Verbosity, debug, logging
Display the configuration when the program starts.
Valid values are True and False.
Description:
Type: boolean
Valid values: True and False
print_config_at_startup = True
Print memory check results.
Valid values are True and False.
Description:
Type: boolean
Valid values: True and False
print_mem_check_results = True
Description:
Type: float
Valid values:
min_mem_report_interval = 0
Description:
Type: boolean
Valid values: True and False
print_proc_table = True
Description:
Type: string
Valid values:
None
cgroup_v1
@ -322,39 +405,61 @@ print_proc_table = True
cmdline
environ
extra_table_info = cgroup_v1
extra_table_info = None
Description:
Type: boolean
Valid values: True and False
print_victim_status = True
max_victim_ancestry_depth = 99
Description:
Type: boolean
Valid values: True and False
print_victim_cmdline = True
Description:
Type: integer
Valid values:
max_victim_ancestry_depth = 99
Description:
Type: boolean
Valid values: True and False
print_statistics = True
Print sleep periods between memory checks.
Valid values are True and False.
Description:
Type: boolean
Valid values: True and False
debug_psi = True
Description:
Type: boolean
Valid values: True and False
debug_gui_notifications = True
Description:
Type: boolean
Valid values: True and False
debug_sleep = True
separate_log = True
Description:
Type: boolean
Valid values: True and False
debug_threading = True
Description:
Type: boolean
Valid values: True and False
###############################################################################
9. Misc
max_soft_exit_time = 10
post_kill_exe = echo 0
forbid_negative_badness = True
separate_log = True
###############################################################################