Fixes
This commit is contained in:
parent
3cd460e6eb
commit
31ee445e7f
21
README.md
21
README.md
@ -149,17 +149,16 @@ optional arguments:
|
|||||||
|
|
||||||
The program can be configured by editing the [config file](https://github.com/hakavlad/nohang/blob/master/nohang.conf). The configuration includes the following sections:
|
The program can be configured by editing the [config file](https://github.com/hakavlad/nohang/blob/master/nohang.conf). The configuration includes the following sections:
|
||||||
|
|
||||||
1. Memory levels to respond to as an OOM threat
|
1. Common zram settings
|
||||||
2. Response on PSI memory metrics
|
2. Common PSI settings
|
||||||
3. The frequency of checking the level of available memory (and CPU usage)
|
3. Poll rate
|
||||||
4. The prevention of killing innocent victims
|
4. Warnings and notifications
|
||||||
5. Impact on the badness of processes via matching their names, cgroups, realpaths, cmdlines and UIDs with certain regular expressions
|
5. Soft threshold
|
||||||
6. The execution of a specific command or sending any signal instead of sending the SIGTERM signal
|
6. Hard threshold
|
||||||
7. GUI notifications:
|
7. Customize victim selection
|
||||||
- notifications of corrective actions taken
|
8. Customize soft corrective actions
|
||||||
- low memory warnings (or executing certain command instead)
|
9. Misc settings
|
||||||
8. Verbosity
|
10. Verbosity, debug, logging
|
||||||
9. Misc
|
|
||||||
|
|
||||||
Just read the description of the parameters and edit the values. Please restart nohang to apply the changes. Default path to the config after installing is `/etc/nohang/nohang.conf`.
|
Just read the description of the parameters and edit the values. Please restart nohang to apply the changes. Default path to the config after installing is `/etc/nohang/nohang.conf`.
|
||||||
|
|
||||||
|
368
nohang
368
nohang
@ -9,45 +9,68 @@ from sys import stdout, stderr, argv, exit
|
|||||||
from re import search
|
from re import search
|
||||||
from sre_constants import error as invalid_re
|
from sre_constants import error as invalid_re
|
||||||
from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
|
from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
|
||||||
from threading import Thread
|
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
###############################################################################
|
||||||
|
|
||||||
# define functions
|
# define functions
|
||||||
|
|
||||||
|
|
||||||
def exe(cmd):
|
def exe(cmd):
|
||||||
""" execute cmd
|
""" execute cmd in subprocess.Popen()
|
||||||
"""
|
"""
|
||||||
log('Execute the command: {}'.format(cmd))
|
|
||||||
t0 = monotonic()
|
|
||||||
write_self_oom_score_adj(self_oom_score_adj_max)
|
|
||||||
err = os.system(cmd)
|
|
||||||
write_self_oom_score_adj(self_oom_score_adj_min)
|
|
||||||
dt = monotonic() - t0
|
|
||||||
log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
|
|
||||||
return err
|
|
||||||
|
|
||||||
|
cmd_num_dict['cmd_num'] += 1
|
||||||
def go(func, *a):
|
cmd_num = cmd_num_dict['cmd_num']
|
||||||
""" run func in new thread
|
log('Execute the command({}) in {}: {}'.format(
|
||||||
"""
|
cmd_num,
|
||||||
t1 = monotonic()
|
threading.current_thread().getName(),
|
||||||
th = Thread(target=func, args=a)
|
cmd))
|
||||||
th_name = th.getName()
|
t3 = monotonic()
|
||||||
if debug_threading:
|
with Popen(cmd, shell=True) as proc:
|
||||||
log('Starting {}'.format(th_name))
|
|
||||||
try:
|
try:
|
||||||
|
proc.wait(timeout=exe_timeout)
|
||||||
|
exit_status = proc.poll()
|
||||||
|
t4 = monotonic()
|
||||||
|
log('Command({}) execution completed in {} sec; exit status' \
|
||||||
|
': {}'.format(cmd_num, round(t4 - t3, 3), exit_status))
|
||||||
|
except TimeoutExpired:
|
||||||
|
proc.kill()
|
||||||
|
t4 = monotonic()
|
||||||
|
log('TimeoutExpired for the command({}) in {} sec'.format(
|
||||||
|
cmd_num, round(t4 - t3, 3)))
|
||||||
|
|
||||||
|
|
||||||
|
def start_thread(func, *a, **k):
|
||||||
|
""" run function in a new thread
|
||||||
|
"""
|
||||||
|
|
||||||
|
th = threading.Thread(target=func, args=a, kwargs=k)
|
||||||
|
th_name = th.getName()
|
||||||
|
|
||||||
|
if debug_threading:
|
||||||
|
|
||||||
|
log('Starting {} from {}'.format(
|
||||||
|
th_name, threading.current_thread().getName()
|
||||||
|
))
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
t1 = monotonic()
|
||||||
th.start()
|
th.start()
|
||||||
t2 = monotonic()
|
t2 = monotonic()
|
||||||
|
|
||||||
if debug_threading:
|
if debug_threading:
|
||||||
log('{} has started in {} ms'.format(
|
log('{} has started in {} ms, {} threads currently alive'.format(
|
||||||
th_name, round((t2 - t1) * 1000, 1)))
|
th_name, round((t2 - t1) * 1000, 1), threading.active_count()
|
||||||
|
))
|
||||||
|
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
if debug_threading:
|
|
||||||
log('RuntimeError: cannot start {}'.format(th_name))
|
log('RuntimeError: cannot start {}'.format(th_name))
|
||||||
|
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
def re_pid_environ(pid):
|
def re_pid_environ(pid):
|
||||||
"""
|
"""
|
||||||
@ -57,7 +80,6 @@ def re_pid_environ(pid):
|
|||||||
'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus')
|
'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus')
|
||||||
returns None if these vars is not in /proc/[pid]/environ
|
returns None if these vars is not in /proc/[pid]/environ
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open('/proc/' + pid + '/environ') as f:
|
with open('/proc/' + pid + '/environ') as f:
|
||||||
env = f.read()
|
env = f.read()
|
||||||
@ -128,8 +150,9 @@ def root_notify_env():
|
|||||||
|
|
||||||
|
|
||||||
def pop(cmd, username):
|
def pop(cmd, username):
|
||||||
|
""" run cmd in subprocess.Popen()
|
||||||
"""
|
"""
|
||||||
"""
|
|
||||||
if swap_total == 0:
|
if swap_total == 0:
|
||||||
wait_time = 2
|
wait_time = 2
|
||||||
else:
|
else:
|
||||||
@ -140,6 +163,7 @@ def pop(cmd, username):
|
|||||||
with Popen(cmd) as proc:
|
with Popen(cmd) as proc:
|
||||||
try:
|
try:
|
||||||
proc.wait(timeout=wait_time)
|
proc.wait(timeout=wait_time)
|
||||||
|
err = proc.poll()
|
||||||
except TimeoutExpired:
|
except TimeoutExpired:
|
||||||
proc.kill()
|
proc.kill()
|
||||||
if debug_gui_notifications:
|
if debug_gui_notifications:
|
||||||
@ -147,8 +171,12 @@ def pop(cmd, username):
|
|||||||
|
|
||||||
t4 = monotonic()
|
t4 = monotonic()
|
||||||
|
|
||||||
|
err = 0
|
||||||
|
|
||||||
if debug_gui_notifications:
|
if debug_gui_notifications:
|
||||||
log('Popen time: {} sec; cmd: {}'.format(round(t4 - t3, 3), cmd))
|
pass
|
||||||
|
#log('Popen time: {} sec; exit status: {}; cmd: {}'.format(round(t4 - t3, 3), err, cmd))
|
||||||
|
log('Popen time: {} sec; exit status: {}; cmd: {}'.format(round(t4 - t3, 3), err, cmd))
|
||||||
|
|
||||||
|
|
||||||
def send_notification(title, body):
|
def send_notification(title, body):
|
||||||
@ -214,7 +242,7 @@ def send_notification(title, body):
|
|||||||
body
|
body
|
||||||
]
|
]
|
||||||
|
|
||||||
go(pop, cmd, username)
|
start_thread(pop, cmd, username)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if debug_gui_notifications:
|
if debug_gui_notifications:
|
||||||
@ -227,7 +255,7 @@ def send_notify_warn():
|
|||||||
log('Warning threshold exceeded')
|
log('Warning threshold exceeded')
|
||||||
|
|
||||||
if check_warning_exe:
|
if check_warning_exe:
|
||||||
go(exe, warning_exe)
|
start_thread(exe, warning_exe)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
@ -238,7 +266,7 @@ def send_notify_warn():
|
|||||||
round(swap_free / (swap_total + 0.1) * 100)
|
round(swap_free / (swap_total + 0.1) * 100)
|
||||||
)
|
)
|
||||||
|
|
||||||
go(send_notification, title, body)
|
start_thread(send_notification, title, body)
|
||||||
|
|
||||||
|
|
||||||
def send_notify(threshold, name, pid):
|
def send_notify(threshold, name, pid):
|
||||||
@ -261,7 +289,7 @@ def send_notify(threshold, name, pid):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
go(send_notification, title, body)
|
start_thread(send_notification, title, body)
|
||||||
|
|
||||||
|
|
||||||
def send_notify_etc(pid, name, command):
|
def send_notify_etc(pid, name, command):
|
||||||
@ -277,43 +305,27 @@ def send_notify_etc(pid, name, command):
|
|||||||
'mmand:\n<b>{}</b>'.format(
|
'mmand:\n<b>{}</b>'.format(
|
||||||
pid, name.replace('&', '*'), command.replace('&', '*'))
|
pid, name.replace('&', '*'), command.replace('&', '*'))
|
||||||
|
|
||||||
go(send_notification, title, body)
|
start_thread(send_notification, title, body)
|
||||||
|
|
||||||
|
|
||||||
def check_config():
|
def check_config():
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
|
|
||||||
log('#' * 79)
|
log('#' * 79)
|
||||||
|
|
||||||
log('0. Common zram settings')
|
log('\n1. Common zram settings')
|
||||||
|
|
||||||
log(' zram_checking_enabled: {}'.format(zram_checking_enabled))
|
log(' zram_checking_enabled: {}'.format(zram_checking_enabled))
|
||||||
|
|
||||||
log('1. Thresholds below which a signal should be sent to the victim')
|
log('\n2. Common PSI settings')
|
||||||
|
|
||||||
log(' soft_threshold_min_mem: {} MiB, {} %'.format(
|
|
||||||
round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1)))
|
|
||||||
log(' hard_threshold_min_mem: {} MiB, {} %'.format(
|
|
||||||
round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1)))
|
|
||||||
log(' soft_threshold_min_swap: {}'.format(soft_threshold_min_swap))
|
|
||||||
log(' hard_threshold_min_swap: {}'.format(hard_threshold_min_swap))
|
|
||||||
log(' soft_threshold_max_zram: {} MiB, {} %'.format(
|
|
||||||
round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1)))
|
|
||||||
log(' hard_threshold_max_zram: {} MiB, {} %'.format(
|
|
||||||
round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1)))
|
|
||||||
|
|
||||||
log('2. Response on PSI memory metrics')
|
|
||||||
|
|
||||||
log(' psi_checking_enabled: {}'.format(psi_checking_enabled))
|
log(' psi_checking_enabled: {}'.format(psi_checking_enabled))
|
||||||
log(' psi_path: {}'.format(psi_path))
|
log(' psi_path: {}'.format(psi_path))
|
||||||
log(' psi_metrics: {}'.format(psi_metrics))
|
log(' psi_metrics: {}'.format(psi_metrics))
|
||||||
log(' soft_threshold_max_psi: {}'.format(soft_threshold_max_psi))
|
|
||||||
log(' hard_threshold_max_psi: {}'.format(hard_threshold_max_psi))
|
|
||||||
log(' psi_excess_duration: {} sec'.format(psi_excess_duration))
|
log(' psi_excess_duration: {} sec'.format(psi_excess_duration))
|
||||||
log(' psi_post_action_delay: {} sec'.format(psi_post_action_delay))
|
log(' psi_post_action_delay: {} sec'.format(psi_post_action_delay))
|
||||||
|
|
||||||
log('3. The frequency of checking the amount of available memory')
|
log('\n3. Poll rate')
|
||||||
|
|
||||||
log(' fill_rate_mem: {}'.format(fill_rate_mem))
|
log(' fill_rate_mem: {}'.format(fill_rate_mem))
|
||||||
log(' fill_rate_swap: {}'.format(fill_rate_swap))
|
log(' fill_rate_swap: {}'.format(fill_rate_swap))
|
||||||
@ -322,18 +334,56 @@ def check_config():
|
|||||||
log(' min_sleep: {} sec'.format(min_sleep))
|
log(' min_sleep: {} sec'.format(min_sleep))
|
||||||
log(' over_sleep: {} sec'.format(over_sleep))
|
log(' over_sleep: {} sec'.format(over_sleep))
|
||||||
|
|
||||||
log('4. The prevention of killing innocent victims')
|
log('\n4. Warnings and notifications')
|
||||||
|
|
||||||
log(' min_badness: {}'.format(min_badness))
|
log(' post_action_gui_notifications: {}'.format(
|
||||||
log(' post_soft_action_delay: {} sec'.format(post_soft_action_delay))
|
post_action_gui_notifications))
|
||||||
log(' post_zombie_delay: {} sec'.format(post_zombie_delay))
|
|
||||||
log(' victim_cache_time: {} sec'.format(victim_cache_time))
|
|
||||||
log(' ignore_positive_oom_score_adj: {}'.format(
|
|
||||||
ignore_positive_oom_score_adj))
|
|
||||||
|
|
||||||
log('5. Impact on the badness of processes')
|
log(' low_memory_warnings_enabled: {}'.format(
|
||||||
|
low_memory_warnings_enabled))
|
||||||
|
log(' warning_exe: {}'.format(warning_exe))
|
||||||
|
log(' warning_threshold_min_mem: {} MiB, {} %'.format(round(
|
||||||
|
warning_threshold_min_mem_mb), round(
|
||||||
|
warning_threshold_min_mem_percent, 1)))
|
||||||
|
log(' warning_threshold_min_swap: {}'.format
|
||||||
|
(warning_threshold_min_swap))
|
||||||
|
log(' warning_threshold_max_zram: {} MiB, {} %'.format(round(
|
||||||
|
warning_threshold_max_zram_mb), round(
|
||||||
|
warning_threshold_max_zram_percent, 1)))
|
||||||
|
log(' warning_threshold_max_psi: {}'.format(
|
||||||
|
warning_threshold_max_psi))
|
||||||
|
log(' min_post_warning_delay: {} sec'.format(
|
||||||
|
min_post_warning_delay))
|
||||||
|
|
||||||
log('5.1. Matching process names with RE patterns')
|
log(' env_cache_time: {}'.format(env_cache_time))
|
||||||
|
|
||||||
|
log('\n5. Soft threshold')
|
||||||
|
|
||||||
|
log(' soft_threshold_min_mem: {} MiB, {} %'.format(round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1)))
|
||||||
|
log(' soft_threshold_min_swap: {}'.format(soft_threshold_min_swap))
|
||||||
|
log(' soft_threshold_max_zram: {} MiB, {} %'.format(round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1)))
|
||||||
|
log(' soft_threshold_max_psi: {}'.format(soft_threshold_max_psi))
|
||||||
|
|
||||||
|
log('\n6. Hard threshold')
|
||||||
|
|
||||||
|
log(' hard_threshold_min_mem: {} MiB, {} %'.format(round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1)))
|
||||||
|
log(' hard_threshold_min_swap: {}'.format(hard_threshold_min_swap))
|
||||||
|
log(' hard_threshold_max_zram: {} MiB, {} %'.format(round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1)))
|
||||||
|
log(' hard_threshold_max_psi: {}'.format(hard_threshold_max_psi))
|
||||||
|
|
||||||
|
log('\n7. Customize victim selection: adjusting badness of processes')
|
||||||
|
|
||||||
|
log('\n7.1. Ignore positive oom_score_adj')
|
||||||
|
|
||||||
|
log(' ignore_positive_oom_score_adj: {}'.format(ignore_positive_oom_score_adj))
|
||||||
|
|
||||||
|
log('\n7.2. Forbid negative badness')
|
||||||
|
|
||||||
|
log(' forbid_negative_badness: {}'.format(forbid_negative_badness))
|
||||||
|
|
||||||
|
log('\n7.3. ')
|
||||||
|
|
||||||
|
log('7.3.1. Matching process names with RE patterns')
|
||||||
if len(badness_adj_re_name_list) > 0:
|
if len(badness_adj_re_name_list) > 0:
|
||||||
log(' regexp: badness_adj:')
|
log(' regexp: badness_adj:')
|
||||||
for i in badness_adj_re_name_list:
|
for i in badness_adj_re_name_list:
|
||||||
@ -341,7 +391,7 @@ def check_config():
|
|||||||
else:
|
else:
|
||||||
log(' (not set)')
|
log(' (not set)')
|
||||||
|
|
||||||
log('5.2. Matching CGroup_v1-line with RE patterns')
|
log('7.3.2. Matching CGroup_v1-line with RE patterns')
|
||||||
if len(badness_adj_re_cgroup_v1_list) > 0:
|
if len(badness_adj_re_cgroup_v1_list) > 0:
|
||||||
log(' regexp: badness_adj:')
|
log(' regexp: badness_adj:')
|
||||||
for i in badness_adj_re_cgroup_v1_list:
|
for i in badness_adj_re_cgroup_v1_list:
|
||||||
@ -349,7 +399,7 @@ def check_config():
|
|||||||
else:
|
else:
|
||||||
log(' (not set)')
|
log(' (not set)')
|
||||||
|
|
||||||
log('5.3. Matching CGroup_v2-line with RE patterns')
|
log('7.3.3. Matching CGroup_v2-line with RE patterns')
|
||||||
if len(badness_adj_re_cgroup_v2_list) > 0:
|
if len(badness_adj_re_cgroup_v2_list) > 0:
|
||||||
log(' regexp: badness_adj:')
|
log(' regexp: badness_adj:')
|
||||||
for i in badness_adj_re_cgroup_v1_list:
|
for i in badness_adj_re_cgroup_v1_list:
|
||||||
@ -357,7 +407,7 @@ def check_config():
|
|||||||
else:
|
else:
|
||||||
log(' (not set)')
|
log(' (not set)')
|
||||||
|
|
||||||
log('5.4. Matching eUIDs with RE patterns')
|
log('7.3.4. Matching eUIDs with RE patterns')
|
||||||
if len(badness_adj_re_cgroup_v2_list) > 0:
|
if len(badness_adj_re_cgroup_v2_list) > 0:
|
||||||
log(' regexp: badness_adj:')
|
log(' regexp: badness_adj:')
|
||||||
for i in badness_adj_re_uid_list:
|
for i in badness_adj_re_uid_list:
|
||||||
@ -365,7 +415,7 @@ def check_config():
|
|||||||
else:
|
else:
|
||||||
log(' (not set)')
|
log(' (not set)')
|
||||||
|
|
||||||
log('5.5. Matching realpath with RE patterns')
|
log('7.3.5. Matching realpath with RE patterns')
|
||||||
if len(badness_adj_re_cgroup_v2_list) > 0:
|
if len(badness_adj_re_cgroup_v2_list) > 0:
|
||||||
log(' regexp: badness_adj:')
|
log(' regexp: badness_adj:')
|
||||||
for i in badness_adj_re_realpath_list:
|
for i in badness_adj_re_realpath_list:
|
||||||
@ -373,7 +423,7 @@ def check_config():
|
|||||||
else:
|
else:
|
||||||
log(' (not set)')
|
log(' (not set)')
|
||||||
|
|
||||||
log('5.6. Matching cmdlines with RE patterns')
|
log('7.3.6. Matching cmdlines with RE patterns')
|
||||||
if len(badness_adj_re_cgroup_v2_list) > 0:
|
if len(badness_adj_re_cgroup_v2_list) > 0:
|
||||||
log(' regexp: badness_adj:')
|
log(' regexp: badness_adj:')
|
||||||
for i in badness_adj_re_cmdline_list:
|
for i in badness_adj_re_cmdline_list:
|
||||||
@ -381,7 +431,7 @@ def check_config():
|
|||||||
else:
|
else:
|
||||||
log(' (not set)')
|
log(' (not set)')
|
||||||
|
|
||||||
log('5.7. Matching environ with RE patterns')
|
log('7.3.7. Matching environ with RE patterns')
|
||||||
if len(badness_adj_re_cgroup_v2_list) > 0:
|
if len(badness_adj_re_cgroup_v2_list) > 0:
|
||||||
log(' regexp: badness_adj:')
|
log(' regexp: badness_adj:')
|
||||||
for i in badness_adj_re_environ_list:
|
for i in badness_adj_re_environ_list:
|
||||||
@ -389,7 +439,7 @@ def check_config():
|
|||||||
else:
|
else:
|
||||||
log(' (not set)')
|
log(' (not set)')
|
||||||
|
|
||||||
log('6. Customize corrective actions')
|
log('\n8. Customize corrective actions')
|
||||||
|
|
||||||
if len(soft_actions_list) > 0:
|
if len(soft_actions_list) > 0:
|
||||||
log(' Match by: regexp: command: ')
|
log(' Match by: regexp: command: ')
|
||||||
@ -398,45 +448,43 @@ def check_config():
|
|||||||
else:
|
else:
|
||||||
log(' (not set)')
|
log(' (not set)')
|
||||||
|
|
||||||
log('7. GUI notifications')
|
log('\n9. Misc')
|
||||||
|
|
||||||
log(' post_action_gui_notifications: {}'.format(
|
log(' max_soft_exit_time: {} sec'.format(max_soft_exit_time))
|
||||||
post_action_gui_notifications))
|
|
||||||
log(' low_memory_warnings_enabled: {}'.format(
|
|
||||||
low_memory_warnings_enabled))
|
|
||||||
log(' warning_exe: {}'.format(warning_exe))
|
|
||||||
log(' warning_threshold_min_mem: {} MiB, {} %'.format(round(
|
|
||||||
warning_threshold_min_mem_mb), round(warning_threshold_min_mem_percent, 1)))
|
|
||||||
log(' warning_threshold_min_swap: {}'.format(warning_threshold_min_swap))
|
|
||||||
log(' warning_threshold_max_zram: {} MiB, {} %'.format(round(
|
|
||||||
warning_threshold_max_zram_mb), round(warning_threshold_max_zram_percent, 1)))
|
|
||||||
log(' warning_threshold_max_psi: {}'.format(warning_threshold_max_psi))
|
|
||||||
log(' min_post_warning_delay: {} sec'.format(min_post_warning_delay))
|
|
||||||
|
|
||||||
log('8. Verbosity')
|
log(' post_kill_exe: {}'.format(post_kill_exe))
|
||||||
|
|
||||||
|
log(' min_badness: {}'.format(min_badness))
|
||||||
|
|
||||||
|
log(' post_soft_action_delay: {} sec'.format(
|
||||||
|
post_soft_action_delay))
|
||||||
|
log(' post_zombie_delay: {} sec'.format(post_zombie_delay))
|
||||||
|
log(' victim_cache_time: {} sec'.format(victim_cache_time))
|
||||||
|
log(' exe_timeout: {} sec'.format(exe_timeout))
|
||||||
|
|
||||||
|
log('\n10. Verbosity')
|
||||||
|
|
||||||
log(' print_config_at_startup: {}'.format(print_config_at_startup))
|
log(' print_config_at_startup: {}'.format(print_config_at_startup))
|
||||||
|
|
||||||
log(' print_mem_check_results: {}'.format(print_mem_check_results))
|
log(' print_mem_check_results: {}'.format(print_mem_check_results))
|
||||||
log(' min_mem_report_interval: {} sec'.format(min_mem_report_interval))
|
log(' min_mem_report_interval: {} sec'.format(
|
||||||
log(' debug_sleep: {}'.format(debug_sleep))
|
min_mem_report_interval))
|
||||||
log(' print_statistics: {}'.format(print_statistics))
|
|
||||||
log(' print_proc_table: {}'.format(print_proc_table))
|
log(' print_proc_table: {}'.format(print_proc_table))
|
||||||
log(' extra_table_info: {}'.format(extra_table_info))
|
log(' extra_table_info: {}'.format(extra_table_info))
|
||||||
|
|
||||||
log(' print_victim_status: {}'.format(print_victim_status))
|
log(' print_victim_status: {}'.format(print_victim_status))
|
||||||
log(' print_victim_cmdline: {}'.format(print_victim_cmdline))
|
log(' print_victim_cmdline: {}'.format(print_victim_cmdline))
|
||||||
log(' max_victim_ancestry_depth: {}'.format(max_victim_ancestry_depth))
|
log(' max_victim_ancestry_depth: {}'.format(max_victim_ancestry_depth))
|
||||||
|
|
||||||
|
log(' print_statistics: {}'.format(print_statistics))
|
||||||
|
|
||||||
log(' debug_gui_notifications: {}'.format(debug_gui_notifications))
|
log(' debug_gui_notifications: {}'.format(debug_gui_notifications))
|
||||||
log(' separate_log: {}'.format(separate_log))
|
|
||||||
log(' debug_psi: {}'.format(debug_psi))
|
log(' debug_psi: {}'.format(debug_psi))
|
||||||
|
log(' debug_sleep: {}'.format(debug_sleep))
|
||||||
|
log(' debug_threading: {}'.format(debug_threading))
|
||||||
|
log(' separate_log: {}'.format(separate_log))
|
||||||
|
|
||||||
log('9. Misc')
|
|
||||||
|
|
||||||
log(' max_soft_exit_time: {} sec'.format(max_soft_exit_time))
|
|
||||||
log(' post_kill_exe: {}'.format(post_kill_exe))
|
|
||||||
log(' forbid_negative_badness: {}'.format(
|
|
||||||
forbid_negative_badness))
|
|
||||||
|
|
||||||
# log(': {}'.format())
|
|
||||||
log('#' * 79)
|
log('#' * 79)
|
||||||
|
|
||||||
if check_config_flag:
|
if check_config_flag:
|
||||||
@ -448,7 +496,6 @@ def get_swap_threshold_tuple(string):
|
|||||||
# re (Num %, True) or (Num KiB, False)
|
# re (Num %, True) or (Num KiB, False)
|
||||||
"""Returns KiB value if abs val was set in config, or tuple with %"""
|
"""Returns KiB value if abs val was set in config, or tuple with %"""
|
||||||
# return tuple with abs and bool: (abs %, True) or (abs MiB, False)
|
# return tuple with abs and bool: (abs %, True) or (abs MiB, False)
|
||||||
|
|
||||||
if string.endswith('%'):
|
if string.endswith('%'):
|
||||||
valid = string_to_float_convert_test(string[:-1])
|
valid = string_to_float_convert_test(string[:-1])
|
||||||
if valid is None:
|
if valid is None:
|
||||||
@ -949,6 +996,11 @@ def errprint(*text):
|
|||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
print(*text, file=stderr, flush=True)
|
print(*text, file=stderr, flush=True)
|
||||||
|
try:
|
||||||
|
if separate_log:
|
||||||
|
logging.info(*msg)
|
||||||
|
except NameError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def mlockall():
|
def mlockall():
|
||||||
@ -1652,11 +1704,8 @@ def check_mem_swap_ex():
|
|||||||
if (mem_available <= hard_threshold_min_mem_kb and
|
if (mem_available <= hard_threshold_min_mem_kb and
|
||||||
swap_free <= hard_threshold_min_swap_kb):
|
swap_free <= hard_threshold_min_swap_kb):
|
||||||
|
|
||||||
mem_info = 'Memory status that requ' \
|
mem_info = 'Memory status that requires corrective actions:\n MemAvailable [{} MiB, {} %] <= hard_threshold_min_mem [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= hard_threshold_min_swap [{} MiB, {} %]'.format(
|
||||||
'ires corrective actions (hard threshold exceeded):' \
|
|
||||||
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
|
||||||
'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
|
||||||
'p_min_sigkill [{} MiB, {} %]'.format(
|
|
||||||
kib_to_mib(mem_available),
|
kib_to_mib(mem_available),
|
||||||
percent(mem_available / mem_total),
|
percent(mem_available / mem_total),
|
||||||
kib_to_mib(hard_threshold_min_mem_kb),
|
kib_to_mib(hard_threshold_min_mem_kb),
|
||||||
@ -1669,14 +1718,13 @@ def check_mem_swap_ex():
|
|||||||
return (SIGKILL, mem_info, mem_available, hard_threshold_min_swap_kb,
|
return (SIGKILL, mem_info, mem_available, hard_threshold_min_swap_kb,
|
||||||
soft_threshold_min_swap_kb, swap_free, swap_total)
|
soft_threshold_min_swap_kb, swap_free, swap_total)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (mem_available <= soft_threshold_min_mem_kb and
|
if (mem_available <= soft_threshold_min_mem_kb and
|
||||||
swap_free <= soft_threshold_min_swap_kb):
|
swap_free <= soft_threshold_min_swap_kb):
|
||||||
|
|
||||||
mem_info = 'Memory status that requi' \
|
mem_info = 'Memory status that requires corrective actions:\n MemAvailable [{} MiB, {} %] <= soft_threshold_min_mem [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= soft_threshold_min_swap [{} MiB, {} %]'.format(
|
||||||
'res corrective actions (soft threshold exceeded):' \
|
|
||||||
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
|
||||||
'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
|
||||||
'p_min_sigterm [{} MiB, {} %]'.format(
|
|
||||||
kib_to_mib(mem_available),
|
kib_to_mib(mem_available),
|
||||||
percent(mem_available / mem_total),
|
percent(mem_available / mem_total),
|
||||||
kib_to_mib(soft_threshold_min_mem_kb),
|
kib_to_mib(soft_threshold_min_mem_kb),
|
||||||
@ -1689,6 +1737,7 @@ def check_mem_swap_ex():
|
|||||||
return (SIGTERM, mem_info, mem_available, hard_threshold_min_swap_kb,
|
return (SIGTERM, mem_info, mem_available, hard_threshold_min_swap_kb,
|
||||||
soft_threshold_min_swap_kb, swap_free, swap_total)
|
soft_threshold_min_swap_kb, swap_free, swap_total)
|
||||||
|
|
||||||
|
|
||||||
if low_memory_warnings_enabled:
|
if low_memory_warnings_enabled:
|
||||||
|
|
||||||
if (mem_available <= warning_threshold_min_mem_kb and swap_free <=
|
if (mem_available <= warning_threshold_min_mem_kb and swap_free <=
|
||||||
@ -1707,10 +1756,8 @@ def check_zram_ex():
|
|||||||
|
|
||||||
if mem_used_zram >= hard_threshold_max_zram_kb:
|
if mem_used_zram >= hard_threshold_max_zram_kb:
|
||||||
|
|
||||||
mem_info = 'Memory status that requir' \
|
mem_info = 'Memory status that requires corrective actions:\n MemUsedZram [{} MiB, {} %] >= hard_threshold_max_zram [{} MiB, {} %]'.format(
|
||||||
'es corrective actions (hard threshold exceeded):' \
|
|
||||||
'\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
|
|
||||||
'kill [{} MiB, {} %]'.format(
|
|
||||||
kib_to_mib(mem_used_zram),
|
kib_to_mib(mem_used_zram),
|
||||||
percent(mem_used_zram / mem_total),
|
percent(mem_used_zram / mem_total),
|
||||||
kib_to_mib(hard_threshold_max_zram_kb),
|
kib_to_mib(hard_threshold_max_zram_kb),
|
||||||
@ -1718,11 +1765,10 @@ def check_zram_ex():
|
|||||||
|
|
||||||
return SIGKILL, mem_info, mem_used_zram
|
return SIGKILL, mem_info, mem_used_zram
|
||||||
|
|
||||||
|
|
||||||
if mem_used_zram >= soft_threshold_max_zram_kb:
|
if mem_used_zram >= soft_threshold_max_zram_kb:
|
||||||
|
|
||||||
mem_info = 'Memory status that requires corrective actions (soft th' \
|
mem_info = 'Memory status that requires corrective actions:\n MemUsedZram [{} MiB, {} %] >= soft_threshold_max_zram [{} M, {} %]'.format(
|
||||||
'reshold exceeded):\n MemUsedZram [{} MiB, {} %] >= zram_max_s' \
|
|
||||||
'igterm [{} M, {} %]'.format(
|
|
||||||
kib_to_mib(mem_used_zram),
|
kib_to_mib(mem_used_zram),
|
||||||
percent(mem_used_zram / mem_total),
|
percent(mem_used_zram / mem_total),
|
||||||
kib_to_mib(soft_threshold_max_zram_kb),
|
kib_to_mib(soft_threshold_max_zram_kb),
|
||||||
@ -1871,6 +1917,20 @@ def is_victim_alive(victim_id):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def implement_corrective_action(
|
def implement_corrective_action(
|
||||||
threshold,
|
threshold,
|
||||||
mem_info_list,
|
mem_info_list,
|
||||||
@ -1882,6 +1942,8 @@ def implement_corrective_action(
|
|||||||
zram_threshold,
|
zram_threshold,
|
||||||
zram_info,
|
zram_info,
|
||||||
psi_info):
|
psi_info):
|
||||||
|
""" great and terrible function
|
||||||
|
"""
|
||||||
|
|
||||||
log('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
|
log('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
|
||||||
|
|
||||||
@ -2067,7 +2129,7 @@ def implement_corrective_action(
|
|||||||
|
|
||||||
cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name(
|
cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name(
|
||||||
pid)).replace('$SERVICE', service)
|
pid)).replace('$SERVICE', service)
|
||||||
go(exe, cmd)
|
start_thread(exe, cmd)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if exit_status == 0:
|
if exit_status == 0:
|
||||||
@ -2212,7 +2274,7 @@ def implement_corrective_action(
|
|||||||
|
|
||||||
log('Execute post_kill_exe')
|
log('Execute post_kill_exe')
|
||||||
|
|
||||||
go(exe, cmd)
|
start_thread(exe, cmd)
|
||||||
|
|
||||||
if post_action_gui_notifications:
|
if post_action_gui_notifications:
|
||||||
if soft_match:
|
if soft_match:
|
||||||
@ -2246,6 +2308,23 @@ def implement_corrective_action(
|
|||||||
return psi_t0
|
return psi_t0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def sleep_after_check_mem():
|
def sleep_after_check_mem():
|
||||||
"""Specify sleep times depends on rates and avialable memory."""
|
"""Specify sleep times depends on rates and avialable memory."""
|
||||||
|
|
||||||
@ -2372,7 +2451,7 @@ def calculate_percent(arg_key):
|
|||||||
return mem_min_kb, mem_min_mb, mem_min_percent
|
return mem_min_kb, mem_min_mb, mem_min_percent
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
# {victim_id : {'time': timestamp, 'name': name}
|
# {victim_id : {'time': timestamp, 'name': name}
|
||||||
@ -2547,7 +2626,7 @@ except ValueError:
|
|||||||
log('config: ' + config)
|
log('config: ' + config)
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
###############################################################################
|
||||||
|
|
||||||
# parsing the config with obtaining the parameters dictionary
|
# parsing the config with obtaining the parameters dictionary
|
||||||
|
|
||||||
@ -2750,12 +2829,11 @@ else:
|
|||||||
soft_actions = True
|
soft_actions = True
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# post_zombie_delay = 0.1
|
|
||||||
|
|
||||||
# victim_cache_time = 50
|
|
||||||
|
|
||||||
|
|
||||||
# extracting parameters from the dictionary
|
# extracting parameters from the dictionary
|
||||||
@ -2777,8 +2855,6 @@ post_action_gui_notifications = conf_parse_bool(
|
|||||||
'post_action_gui_notifications')
|
'post_action_gui_notifications')
|
||||||
|
|
||||||
|
|
||||||
if low_memory_warnings_enabled or post_action_gui_notifications:
|
|
||||||
from subprocess import Popen, TimeoutExpired
|
|
||||||
|
|
||||||
|
|
||||||
debug_threading = conf_parse_bool('debug_threading')
|
debug_threading = conf_parse_bool('debug_threading')
|
||||||
@ -2850,13 +2926,35 @@ if 'env_cache_time' in config_dict:
|
|||||||
errprint('Invalid env_cache_time value, not float\nExit')
|
errprint('Invalid env_cache_time value, not float\nExit')
|
||||||
exit(1)
|
exit(1)
|
||||||
if env_cache_time < 0:
|
if env_cache_time < 0:
|
||||||
errprint('fill_rate_mem MUST be >= 0\nExit')
|
errprint('env_cache_time MUST be >= 0\nExit')
|
||||||
exit(1)
|
exit(1)
|
||||||
else:
|
else:
|
||||||
errprint('fill_rate_mem not in config\nExit')
|
errprint('env_cache_time not in config\nExit')
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if 'exe_timeout' in config_dict:
|
||||||
|
exe_timeout = string_to_float_convert_test(
|
||||||
|
config_dict['exe_timeout'])
|
||||||
|
if exe_timeout is None:
|
||||||
|
errprint('Invalid exe_timeout value, not float\nExit')
|
||||||
|
exit(1)
|
||||||
|
if exe_timeout <= 0:
|
||||||
|
errprint('exe_timeout MUST be > 0\nExit')
|
||||||
|
exit(1)
|
||||||
|
else:
|
||||||
|
errprint('exe_timeout not in config\nExit')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if 'fill_rate_mem' in config_dict:
|
if 'fill_rate_mem' in config_dict:
|
||||||
fill_rate_mem = string_to_float_convert_test(config_dict['fill_rate_mem'])
|
fill_rate_mem = string_to_float_convert_test(config_dict['fill_rate_mem'])
|
||||||
if fill_rate_mem is None:
|
if fill_rate_mem is None:
|
||||||
@ -3230,9 +3328,26 @@ if print_proc_table_flag:
|
|||||||
func_print_proc_table()
|
func_print_proc_table()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (low_memory_warnings_enabled or \
|
||||||
|
post_action_gui_notifications or \
|
||||||
|
check_warning_exe or \
|
||||||
|
soft_actions or \
|
||||||
|
post_kill_exe != ''):
|
||||||
|
|
||||||
|
import threading
|
||||||
|
from subprocess import Popen, TimeoutExpired
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
psi_support = os.path.exists(psi_path)
|
psi_support = os.path.exists(psi_path)
|
||||||
|
|
||||||
|
|
||||||
@ -3298,7 +3413,7 @@ fill_rate_zram = fill_rate_zram * 1024
|
|||||||
|
|
||||||
|
|
||||||
warn_time_now = 0
|
warn_time_now = 0
|
||||||
warn_time_delta = 1000
|
warn_time_delta = 1000 # ?
|
||||||
warn_timer = 0
|
warn_timer = 0
|
||||||
|
|
||||||
|
|
||||||
@ -3372,6 +3487,15 @@ envd = dict()
|
|||||||
envd['list_with_envs'] = envd['t'] = None
|
envd['list_with_envs'] = envd['t'] = None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cmd_num_dict = dict()
|
||||||
|
cmd_num_dict['cmd_num'] = 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,188 +6,240 @@
|
|||||||
|
|
||||||
The configuration includes the following sections:
|
The configuration includes the following sections:
|
||||||
|
|
||||||
0. Common zram settings
|
1. Common zram settings
|
||||||
1. Memory levels to respond to as an OOM threat
|
2. Common PSI settings
|
||||||
2. Response on PSI memory metrics
|
3. Poll rate
|
||||||
3. The frequency of checking the level of available memory
|
4. Warnings and notifications
|
||||||
(and CPU usage)
|
5. Soft threshold
|
||||||
4. The prevention of killing innocent victims
|
6. Hard threshold
|
||||||
5. Impact on the badness of processes via matching their names, cgroups and
|
7. Customize victim selection: adjusting badness of processes
|
||||||
cmdlines with specified regular expressions
|
8. Customize soft corrective actions
|
||||||
6. Customize corrective actions: the execution of a specific command
|
9. Misc settings
|
||||||
instead of sending the SIGTERM signal
|
10. Verbosity, debug, logging
|
||||||
7. GUI notifications:
|
|
||||||
- low memory warnings
|
|
||||||
- OOM prevention results
|
|
||||||
8. Output verbosity
|
|
||||||
9. Misc
|
|
||||||
|
|
||||||
Just read the description of the parameters and edit the values.
|
Just read the description of the parameters and edit the values.
|
||||||
Please restart the program after editing the config.
|
Please restart the program after editing the config.
|
||||||
|
|
||||||
More docs will be written later.
|
TODO: improve descriptions
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
0. Common zram settings
|
1. Common zram settings
|
||||||
|
|
||||||
See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
|
Key: zram_checking_enabled
|
||||||
You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize.
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
Default value: False
|
||||||
|
|
||||||
zram_checking_enabled = False
|
zram_checking_enabled = False
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
1. Thresholds below which a signal should be sent to the victim
|
2. Common PSI settings
|
||||||
|
|
||||||
Sets the available memory levels at or below which SIGTERM or SIGKILL
|
Description:
|
||||||
signals are sent. The signal will be sent if MemAvailable and
|
Type: boolean
|
||||||
SwapFree (in /proc/meminfo) at the same time will drop below the
|
Valid values: True and False
|
||||||
corresponding values. Can be specified in % (percent) and M (MiB).
|
|
||||||
Valid values are floating-point numbers from the range [0; 100] %.
|
|
||||||
|
|
||||||
MemAvailable levels.
|
|
||||||
|
|
||||||
soft_threshold_min_mem = 8 %
|
|
||||||
hard_threshold_min_mem = 4 %
|
|
||||||
|
|
||||||
SwapFree levels.
|
|
||||||
|
|
||||||
soft_threshold_min_swap = 10 %
|
|
||||||
hard_threshold_min_swap = 5 %
|
|
||||||
|
|
||||||
Specifying the total share of zram in memory, if exceeded the
|
|
||||||
corresponding signals are sent. As the share of zram in memory
|
|
||||||
increases, it may fall responsiveness of the system. 90 % is a
|
|
||||||
usual hang level, not recommended to set very high.
|
|
||||||
|
|
||||||
Can be specified in % and M. Valid values are floating-point
|
|
||||||
numbers from the range [0; 90] %.
|
|
||||||
|
|
||||||
soft_threshold_max_zram = 60 %
|
|
||||||
hard_threshold_max_zram = 65 %
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
|
|
||||||
|
|
||||||
About PSI:
|
|
||||||
https://facebookmicrosites.github.io/psi/
|
|
||||||
|
|
||||||
Disabled by default (psi_checking_enabled = False).
|
|
||||||
|
|
||||||
psi_checking_enabled = False
|
psi_checking_enabled = False
|
||||||
|
|
||||||
Choose a path to PSI file.
|
Description:
|
||||||
By default it monitors system-wide file: /proc/pressure/memory
|
Type: string
|
||||||
You also can set file to monitor one cgroup slice.
|
Valid values:
|
||||||
For example:
|
|
||||||
psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
|
|
||||||
psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
|
|
||||||
psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
|
|
||||||
|
|
||||||
Execute the command
|
|
||||||
find /sys/fs/cgroup -name memory.pressure
|
|
||||||
to find available memory.pressue files (except /proc/pressure/memory).
|
|
||||||
(actual for cgroup2)
|
|
||||||
|
|
||||||
psi_path = /proc/pressure/memory
|
psi_path = /proc/pressure/memory
|
||||||
|
|
||||||
Valid psi_metrics are:
|
Description:
|
||||||
some_avg10
|
Type: string
|
||||||
some_avg60
|
Valid values:
|
||||||
some_avg300
|
|
||||||
full_avg10
|
|
||||||
full_avg60
|
|
||||||
full_avg300
|
|
||||||
|
|
||||||
some_avg10 is most sensitive.
|
|
||||||
|
|
||||||
psi_metrics = some_avg10
|
psi_metrics = some_avg10
|
||||||
|
|
||||||
soft_threshold_max_psi = 60
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
hard_threshold_max_psi = 90
|
|
||||||
|
|
||||||
>= 0, float
|
|
||||||
psi_excess_duration = 60
|
psi_excess_duration = 60
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
psi_post_action_delay = 60
|
psi_post_action_delay = 60
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
3. The frequency of checking the amount of available memory
|
3. Poll rate
|
||||||
(and CPU usage)
|
|
||||||
|
|
||||||
Coefficients that affect the intensity of monitoring. Reducing
|
Description:
|
||||||
the coefficients can reduce CPU usage and increase the periods
|
Type: float
|
||||||
between memory checks.
|
Valid values:
|
||||||
|
|
||||||
Why three coefficients instead of one? Because the swap fill rate
|
|
||||||
is usually lower than the RAM fill rate.
|
|
||||||
|
|
||||||
It is possible to set a lower intensity of monitoring for swap
|
|
||||||
without compromising to prevent OOM and thus reduce the CPU load.
|
|
||||||
|
|
||||||
Default values are well for desktop. On servers without rapid
|
|
||||||
fluctuations in memory levels the values can be reduced.
|
|
||||||
|
|
||||||
Valid values are positive floating-point numbers.
|
|
||||||
|
|
||||||
fill_rate_mem = 4000
|
fill_rate_mem = 4000
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
fill_rate_swap = 1500
|
fill_rate_swap = 1500
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
fill_rate_zram = 6000
|
fill_rate_zram = 6000
|
||||||
|
|
||||||
See also https://github.com/rfjakob/earlyoom/issues/61
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
max_sleep = 3
|
max_sleep = 3
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
min_sleep = 0.1
|
min_sleep = 0.1
|
||||||
|
|
||||||
Sleep time if soft threshold exceeded.
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
over_sleep = 0.05
|
over_sleep = 0.05
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
4. The prevention of killing innocent victims
|
4. Warnings and notifications
|
||||||
|
|
||||||
Valid values are integers from the range [0; 1000].
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
min_badness = 10
|
post_action_gui_notifications = True
|
||||||
|
|
||||||
Valid values are non-negative floating-point numbers.
|
Description:
|
||||||
Min delay if a victim doesn't respond to SIGTERM in 10 ms.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
post_soft_action_delay = 3
|
low_memory_warnings_enabled = True
|
||||||
|
|
||||||
post_zombie_delay = 0.1
|
Description:
|
||||||
|
Type: string
|
||||||
|
Valid values:
|
||||||
|
|
||||||
victim_cache_time = 10
|
warning_exe =
|
||||||
|
|
||||||
Valid values are True and False.
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
ignore_positive_oom_score_adj = False
|
warning_threshold_min_mem = 20 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
warning_threshold_min_swap = 20 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
warning_threshold_max_zram = 50 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
warning_threshold_max_psi = 100
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
min_post_warning_delay = 30
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
env_cache_time = 300
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
5. Impact on the badness of processes via matching their names,
|
5. Soft threshold
|
||||||
cmdlines or UIDs with regular expressions using re.search().
|
|
||||||
|
|
||||||
See https://en.wikipedia.org/wiki/Regular_expression and
|
Description:
|
||||||
https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Enabling this options slows down the search for the victim
|
soft_threshold_min_mem = 8 %
|
||||||
because the names, cmdlines or UIDs of all processes
|
|
||||||
(except init and kthreads) are compared with the
|
|
||||||
specified regex patterns (in fact slowing down is caused by
|
|
||||||
reading all /proc/*/cmdline and /proc/*/status files).
|
|
||||||
|
|
||||||
Use script `oom-sort` from nohang package to view
|
Description:
|
||||||
names, cmdlines and UIDs of processes.
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
5.1. Matching process names with RE patterns
|
soft_threshold_min_swap = 8 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
soft_threshold_max_zram = 60 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
soft_threshold_max_psi = 60
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
6. Hard threshold
|
||||||
|
|
||||||
|
hard_threshold_min_mem = 4 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
hard_threshold_min_swap = 4 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
hard_threshold_max_zram = 65 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
hard_threshold_max_psi = 90
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
7. Customize victim selection: adjusting badness of processes
|
||||||
|
|
||||||
|
7.1. Ignore positive oom_score_adj
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
|
ignore_positive_oom_score_adj = False
|
||||||
|
|
||||||
|
7.2. Forbid negative badness
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
|
forbid_negative_badness = True
|
||||||
|
|
||||||
|
|
||||||
|
7.3.1. Matching process names with RE patterns change their badness
|
||||||
|
|
||||||
Syntax:
|
Syntax:
|
||||||
|
|
||||||
@ -204,28 +256,27 @@ ignore_positive_oom_score_adj = False
|
|||||||
Prefer firefox tabs
|
Prefer firefox tabs
|
||||||
@BADNESS_ADJ_RE_NAME 300 /// ^Web Content$
|
@BADNESS_ADJ_RE_NAME 300 /// ^Web Content$
|
||||||
|
|
||||||
|
7.3.2. Matching CGroup_v1-line with RE patterns
|
||||||
|
|
||||||
5.2. Matching CGroup_v1-line with RE patterns
|
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 -100 /// ^/system\.slice/
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
|
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
|
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
|
||||||
|
|
||||||
5.3. Matching CGroup_v2-line with RE patterns
|
7.3.3. Matching CGroup_v2-line with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
|
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
|
||||||
|
|
||||||
5.4. Matching eUIDs with RE patterns
|
7.3.4. Matching eUIDs with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_UID -100 /// ^0$
|
@BADNESS_ADJ_RE_UID -100 /// ^0$
|
||||||
|
|
||||||
5.5. Matching realpath with RE patterns
|
7.3.5. Matching realpath with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
|
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
|
||||||
|
|
||||||
5.6. Matching cmdlines with RE patterns
|
7.3.6. Matching cmdlines with RE patterns
|
||||||
|
|
||||||
A good option that allows fine adjustment.
|
A good option that allows fine adjustment.
|
||||||
|
|
||||||
@ -233,21 +284,22 @@ ignore_positive_oom_score_adj = False
|
|||||||
@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
|
@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
|
||||||
|
|
||||||
Prefer firefox tabs (Web Content and WebExtensions)
|
Prefer firefox tabs (Web Content and WebExtensions)
|
||||||
@BADNESS_ADJ_RE_CMDLINE 300 /// -appomni
|
@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
|
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
|
||||||
|
|
||||||
5.7. Matching environ with RE patterns
|
7.3.7. Matching environ with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
|
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
|
||||||
|
|
||||||
|
|
||||||
Note that you can control badness also via systemd units via
|
Note that you can control badness also via systemd units via
|
||||||
OOMScoreAdjust, see
|
OOMScoreAdjust, see
|
||||||
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
|
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
6. Customize corrective actions.
|
8. Customize soft corrective actions
|
||||||
|
|
||||||
TODO: docs
|
TODO: docs
|
||||||
|
|
||||||
@ -260,6 +312,8 @@ ignore_positive_oom_score_adj = False
|
|||||||
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
|
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
|
||||||
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
|
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
|
||||||
|
|
||||||
|
@SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID
|
||||||
|
|
||||||
$PID will be replaced by process PID.
|
$PID will be replaced by process PID.
|
||||||
$NAME will be replaced by process name.
|
$NAME will be replaced by process name.
|
||||||
$SERVICE will be replaced by .service if it exists (overwise it will be
|
$SERVICE will be replaced by .service if it exists (overwise it will be
|
||||||
@ -267,60 +321,80 @@ ignore_positive_oom_score_adj = False
|
|||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
7. GUI notifications & low memory warnings
|
9. Misc settings
|
||||||
|
|
||||||
post_action_gui_notifications = True
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Enable GUI notifications about the low level of available memory.
|
max_soft_exit_time = 10
|
||||||
Valid values are True and False.
|
|
||||||
|
|
||||||
low_memory_warnings_enabled = True
|
Description:
|
||||||
|
Type: string
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Execute the command instead of sending GUI notifications if the value is
|
post_kill_exe =
|
||||||
not empty line. For example:
|
|
||||||
warning_exe = cat /proc/meminfo &
|
|
||||||
|
|
||||||
warning_exe =
|
Description:
|
||||||
|
Type: integer
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Can be specified in % (percent) and M (MiB).
|
min_badness = 10
|
||||||
Valid values are floating-point numbers from the range [0; 100] %.
|
|
||||||
|
|
||||||
warning_threshold_min_mem = 20 %
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
warning_threshold_min_swap = 25 %
|
post_soft_action_delay = 3
|
||||||
|
|
||||||
warning_threshold_max_zram = 50 %
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
warning_threshold_max_psi = 100
|
post_zombie_delay = 0.1
|
||||||
|
|
||||||
Valid values are floating-point numbers from the range [1; 300].
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
min_post_warning_delay = 30
|
victim_cache_time = 10
|
||||||
|
|
||||||
env_cache_time = 300
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
exe_timeout = 20
|
||||||
Ampersands (&) will be replaced with asterisks (*) in process
|
|
||||||
names and in commands.
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
8. Verbosity
|
10. Verbosity, debug, logging
|
||||||
|
|
||||||
Display the configuration when the program starts.
|
Description:
|
||||||
Valid values are True and False.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_config_at_startup = False
|
print_config_at_startup = False
|
||||||
|
|
||||||
Print memory check results.
|
Description:
|
||||||
Valid values are True and False.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_mem_check_results = False
|
print_mem_check_results = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
min_mem_report_interval = 60
|
min_mem_report_interval = 60
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_proc_table = False
|
print_proc_table = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: string
|
||||||
Valid values:
|
Valid values:
|
||||||
None
|
None
|
||||||
cgroup_v1
|
cgroup_v1
|
||||||
@ -331,36 +405,59 @@ print_proc_table = False
|
|||||||
|
|
||||||
extra_table_info = None
|
extra_table_info = None
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_victim_status = True
|
print_victim_status = True
|
||||||
|
|
||||||
max_victim_ancestry_depth = 3
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_victim_cmdline = False
|
print_victim_cmdline = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: integer
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
max_victim_ancestry_depth = 3
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_statistics = True
|
print_statistics = True
|
||||||
|
|
||||||
Print sleep periods between memory checks.
|
Description:
|
||||||
Valid values are True and False.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_psi = False
|
debug_psi = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_gui_notifications = False
|
debug_gui_notifications = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_sleep = False
|
debug_sleep = False
|
||||||
|
|
||||||
separate_log = False
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_threading = False
|
debug_threading = False
|
||||||
|
|
||||||
###############################################################################
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
9. Misc
|
separate_log = False
|
||||||
|
|
||||||
max_soft_exit_time = 10
|
|
||||||
|
|
||||||
post_kill_exe =
|
|
||||||
|
|
||||||
forbid_negative_badness = True
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
435
nohang.conf
435
nohang.conf
@ -6,188 +6,240 @@
|
|||||||
|
|
||||||
The configuration includes the following sections:
|
The configuration includes the following sections:
|
||||||
|
|
||||||
0. Common zram settings
|
1. Common zram settings
|
||||||
1. Memory levels to respond to as an OOM threat
|
2. Common PSI settings
|
||||||
2. Response on PSI memory metrics
|
3. Poll rate
|
||||||
3. The frequency of checking the level of available memory
|
4. Warnings and notifications
|
||||||
(and CPU usage)
|
5. Soft threshold
|
||||||
4. The prevention of killing innocent victims
|
6. Hard threshold
|
||||||
5. Impact on the badness of processes via matching their names, cgroups and
|
7. Customize victim selection: adjusting badness of processes
|
||||||
cmdlines with specified regular expressions
|
8. Customize soft corrective actions
|
||||||
6. Customize corrective actions: the execution of a specific command
|
9. Misc settings
|
||||||
instead of sending the SIGTERM signal
|
10. Verbosity, debug, logging
|
||||||
7. GUI notifications:
|
|
||||||
- low memory warnings
|
|
||||||
- OOM prevention results
|
|
||||||
8. Output verbosity
|
|
||||||
9. Misc
|
|
||||||
|
|
||||||
Just read the description of the parameters and edit the values.
|
Just read the description of the parameters and edit the values.
|
||||||
Please restart the program after editing the config.
|
Please restart the program after editing the config.
|
||||||
|
|
||||||
More docs will be written later.
|
TODO: improve descriptions
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
0. Common zram settings
|
1. Common zram settings
|
||||||
|
|
||||||
See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
|
Key: zram_checking_enabled
|
||||||
You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize.
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
Default value: False
|
||||||
|
|
||||||
zram_checking_enabled = False
|
zram_checking_enabled = False
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
1. Thresholds below which a signal should be sent to the victim
|
2. Common PSI settings
|
||||||
|
|
||||||
Sets the available memory levels at or below which SIGTERM or SIGKILL
|
Description:
|
||||||
signals are sent. The signal will be sent if MemAvailable and
|
Type: boolean
|
||||||
SwapFree (in /proc/meminfo) at the same time will drop below the
|
Valid values: True and False
|
||||||
corresponding values. Can be specified in % (percent) and M (MiB).
|
|
||||||
Valid values are floating-point numbers from the range [0; 100] %.
|
|
||||||
|
|
||||||
MemAvailable levels.
|
|
||||||
|
|
||||||
soft_threshold_min_mem = 8 %
|
|
||||||
hard_threshold_min_mem = 4 %
|
|
||||||
|
|
||||||
SwapFree levels.
|
|
||||||
|
|
||||||
soft_threshold_min_swap = 10 %
|
|
||||||
hard_threshold_min_swap = 5 %
|
|
||||||
|
|
||||||
Specifying the total share of zram in memory, if exceeded the
|
|
||||||
corresponding signals are sent. As the share of zram in memory
|
|
||||||
increases, it may fall responsiveness of the system. 90 % is a
|
|
||||||
usual hang level, not recommended to set very high.
|
|
||||||
|
|
||||||
Can be specified in % and M. Valid values are floating-point
|
|
||||||
numbers from the range [0; 90] %.
|
|
||||||
|
|
||||||
soft_threshold_max_zram = 60 %
|
|
||||||
hard_threshold_max_zram = 65 %
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
|
|
||||||
|
|
||||||
About PSI:
|
|
||||||
https://facebookmicrosites.github.io/psi/
|
|
||||||
|
|
||||||
Disabled by default (psi_checking_enabled = False).
|
|
||||||
|
|
||||||
psi_checking_enabled = False
|
psi_checking_enabled = False
|
||||||
|
|
||||||
Choose a path to PSI file.
|
Description:
|
||||||
By default it monitors system-wide file: /proc/pressure/memory
|
Type: string
|
||||||
You also can set file to monitor one cgroup slice.
|
Valid values:
|
||||||
For example:
|
|
||||||
psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
|
|
||||||
psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
|
|
||||||
psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
|
|
||||||
|
|
||||||
Execute the command
|
|
||||||
find /sys/fs/cgroup -name memory.pressure
|
|
||||||
to find available memory.pressue files (except /proc/pressure/memory).
|
|
||||||
(actual for cgroup2)
|
|
||||||
|
|
||||||
psi_path = /proc/pressure/memory
|
psi_path = /proc/pressure/memory
|
||||||
|
|
||||||
Valid psi_metrics are:
|
Description:
|
||||||
some_avg10
|
Type: string
|
||||||
some_avg60
|
Valid values:
|
||||||
some_avg300
|
|
||||||
full_avg10
|
|
||||||
full_avg60
|
|
||||||
full_avg300
|
|
||||||
|
|
||||||
some_avg10 is most sensitive.
|
|
||||||
|
|
||||||
psi_metrics = some_avg10
|
psi_metrics = some_avg10
|
||||||
|
|
||||||
soft_threshold_max_psi = 60
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
hard_threshold_max_psi = 90
|
|
||||||
|
|
||||||
>= 0, float
|
|
||||||
psi_excess_duration = 60
|
psi_excess_duration = 60
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
psi_post_action_delay = 60
|
psi_post_action_delay = 60
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
3. The frequency of checking the amount of available memory
|
3. Poll rate
|
||||||
(and CPU usage)
|
|
||||||
|
|
||||||
Coefficients that affect the intensity of monitoring. Reducing
|
Description:
|
||||||
the coefficients can reduce CPU usage and increase the periods
|
Type: float
|
||||||
between memory checks.
|
Valid values:
|
||||||
|
|
||||||
Why three coefficients instead of one? Because the swap fill rate
|
|
||||||
is usually lower than the RAM fill rate.
|
|
||||||
|
|
||||||
It is possible to set a lower intensity of monitoring for swap
|
|
||||||
without compromising to prevent OOM and thus reduce the CPU load.
|
|
||||||
|
|
||||||
Default values are well for desktop. On servers without rapid
|
|
||||||
fluctuations in memory levels the values can be reduced.
|
|
||||||
|
|
||||||
Valid values are positive floating-point numbers.
|
|
||||||
|
|
||||||
fill_rate_mem = 4000
|
fill_rate_mem = 4000
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
fill_rate_swap = 1500
|
fill_rate_swap = 1500
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
fill_rate_zram = 6000
|
fill_rate_zram = 6000
|
||||||
|
|
||||||
See also https://github.com/rfjakob/earlyoom/issues/61
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
max_sleep = 3
|
max_sleep = 3
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
min_sleep = 0.1
|
min_sleep = 0.1
|
||||||
|
|
||||||
Sleep time if soft threshold exceeded.
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
over_sleep = 0.05
|
over_sleep = 0.05
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
4. The prevention of killing innocent victims
|
4. Warnings and notifications
|
||||||
|
|
||||||
Valid values are integers from the range [0; 1000].
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
min_badness = 10
|
post_action_gui_notifications = False
|
||||||
|
|
||||||
Valid values are non-negative floating-point numbers.
|
Description:
|
||||||
Min delay if a victim doesn't respond to SIGTERM in 10 ms.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
post_soft_action_delay = 3
|
low_memory_warnings_enabled = False
|
||||||
|
|
||||||
post_zombie_delay = 0.1
|
Description:
|
||||||
|
Type: string
|
||||||
|
Valid values:
|
||||||
|
|
||||||
victim_cache_time = 10
|
warning_exe =
|
||||||
|
|
||||||
Valid values are True and False.
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
ignore_positive_oom_score_adj = False
|
warning_threshold_min_mem = 20 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
warning_threshold_min_swap = 20 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
warning_threshold_max_zram = 50 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
warning_threshold_max_psi = 100
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
min_post_warning_delay = 30
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
env_cache_time = 300
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
5. Impact on the badness of processes via matching their names,
|
5. Soft threshold
|
||||||
cmdlines or UIDs with regular expressions using re.search().
|
|
||||||
|
|
||||||
See https://en.wikipedia.org/wiki/Regular_expression and
|
Description:
|
||||||
https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Enabling this options slows down the search for the victim
|
soft_threshold_min_mem = 8 %
|
||||||
because the names, cmdlines or UIDs of all processes
|
|
||||||
(except init and kthreads) are compared with the
|
|
||||||
specified regex patterns (in fact slowing down is caused by
|
|
||||||
reading all /proc/*/cmdline and /proc/*/status files).
|
|
||||||
|
|
||||||
Use script `oom-sort` from nohang package to view
|
Description:
|
||||||
names, cmdlines and UIDs of processes.
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
5.1. Matching process names with RE patterns
|
soft_threshold_min_swap = 8 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
soft_threshold_max_zram = 60 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
soft_threshold_max_psi = 60
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
6. Hard threshold
|
||||||
|
|
||||||
|
hard_threshold_min_mem = 4 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
hard_threshold_min_swap = 4 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
hard_threshold_max_zram = 65 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
hard_threshold_max_psi = 90
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
7. Customize victim selection: adjusting badness of processes
|
||||||
|
|
||||||
|
7.1. Ignore positive oom_score_adj
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
|
ignore_positive_oom_score_adj = False
|
||||||
|
|
||||||
|
7.2. Forbid negative badness
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
|
forbid_negative_badness = True
|
||||||
|
|
||||||
|
|
||||||
|
7.3.1. Matching process names with RE patterns change their badness
|
||||||
|
|
||||||
Syntax:
|
Syntax:
|
||||||
|
|
||||||
@ -201,27 +253,27 @@ ignore_positive_oom_score_adj = False
|
|||||||
Example:
|
Example:
|
||||||
@BADNESS_ADJ_RE_NAME -500 /// ^sshd$
|
@BADNESS_ADJ_RE_NAME -500 /// ^sshd$
|
||||||
|
|
||||||
5.2. Matching CGroup_v1-line with RE patterns
|
7.3.2. Matching CGroup_v1-line with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 -100 /// ^/system\.slice/
|
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
|
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
|
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
|
||||||
|
|
||||||
5.3. Matching CGroup_v2-line with RE patterns
|
7.3.3. Matching CGroup_v2-line with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
|
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
|
||||||
|
|
||||||
5.4. Matching eUIDs with RE patterns
|
7.3.4. Matching eUIDs with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_UID -100 /// ^0$
|
@BADNESS_ADJ_RE_UID -100 /// ^0$
|
||||||
|
|
||||||
5.5. Matching realpath with RE patterns
|
7.3.5. Matching realpath with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
|
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
|
||||||
|
|
||||||
5.6. Matching cmdlines with RE patterns
|
7.3.6. Matching cmdlines with RE patterns
|
||||||
|
|
||||||
A good option that allows fine adjustment.
|
A good option that allows fine adjustment.
|
||||||
|
|
||||||
@ -229,21 +281,22 @@ ignore_positive_oom_score_adj = False
|
|||||||
@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
|
@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
|
||||||
|
|
||||||
Prefer firefox tabs (Web Content and WebExtensions)
|
Prefer firefox tabs (Web Content and WebExtensions)
|
||||||
@BADNESS_ADJ_RE_CMDLINE 300 /// -appomni
|
@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
|
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
|
||||||
|
|
||||||
5.7. Matching environ with RE patterns
|
7.3.7. Matching environ with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
|
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
|
||||||
|
|
||||||
|
|
||||||
Note that you can control badness also via systemd units via
|
Note that you can control badness also via systemd units via
|
||||||
OOMScoreAdjust, see
|
OOMScoreAdjust, see
|
||||||
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
|
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
6. Customize corrective actions.
|
8. Customize soft corrective actions
|
||||||
|
|
||||||
TODO: docs
|
TODO: docs
|
||||||
|
|
||||||
@ -256,6 +309,8 @@ ignore_positive_oom_score_adj = False
|
|||||||
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
|
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
|
||||||
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
|
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
|
||||||
|
|
||||||
|
@SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID
|
||||||
|
|
||||||
$PID will be replaced by process PID.
|
$PID will be replaced by process PID.
|
||||||
$NAME will be replaced by process name.
|
$NAME will be replaced by process name.
|
||||||
$SERVICE will be replaced by .service if it exists (overwise it will be
|
$SERVICE will be replaced by .service if it exists (overwise it will be
|
||||||
@ -263,59 +318,80 @@ ignore_positive_oom_score_adj = False
|
|||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
7. GUI notifications & low memory warnings
|
9. Misc settings
|
||||||
|
|
||||||
post_action_gui_notifications = False
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Enable GUI notifications about the low level of available memory.
|
max_soft_exit_time = 10
|
||||||
Valid values are True and False.
|
|
||||||
|
|
||||||
low_memory_warnings_enabled = False
|
Description:
|
||||||
|
Type: string
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Execute the command instead of sending GUI notifications if the value is
|
post_kill_exe =
|
||||||
not empty line. For example:
|
|
||||||
warning_exe = cat /proc/meminfo &
|
|
||||||
|
|
||||||
warning_exe =
|
Description:
|
||||||
|
Type: integer
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Can be specified in % (percent) and M (MiB).
|
min_badness = 10
|
||||||
Valid values are floating-point numbers from the range [0; 100] %.
|
|
||||||
|
|
||||||
warning_threshold_min_mem = 20 %
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
warning_threshold_min_swap = 25 %
|
post_soft_action_delay = 3
|
||||||
|
|
||||||
warning_threshold_max_zram = 50 %
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
warning_threshold_max_psi = 100
|
post_zombie_delay = 0.1
|
||||||
|
|
||||||
Valid values are floating-point numbers from the range [1; 300].
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
min_post_warning_delay = 20
|
victim_cache_time = 10
|
||||||
|
|
||||||
env_cache_time = 300
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Ampersands (&) will be replaced with asterisks (*) in process
|
exe_timeout = 20
|
||||||
names and in commands.
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
8. Verbosity
|
10. Verbosity, debug, logging
|
||||||
|
|
||||||
Display the configuration when the program starts.
|
Description:
|
||||||
Valid values are True and False.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_config_at_startup = False
|
print_config_at_startup = False
|
||||||
|
|
||||||
Print memory check results.
|
Description:
|
||||||
Valid values are True and False.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_mem_check_results = False
|
print_mem_check_results = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
min_mem_report_interval = 60
|
min_mem_report_interval = 60
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_proc_table = False
|
print_proc_table = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: string
|
||||||
Valid values:
|
Valid values:
|
||||||
None
|
None
|
||||||
cgroup_v1
|
cgroup_v1
|
||||||
@ -326,36 +402,59 @@ print_proc_table = False
|
|||||||
|
|
||||||
extra_table_info = None
|
extra_table_info = None
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_victim_status = True
|
print_victim_status = True
|
||||||
|
|
||||||
max_victim_ancestry_depth = 3
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_victim_cmdline = False
|
print_victim_cmdline = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: integer
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
max_victim_ancestry_depth = 3
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_statistics = True
|
print_statistics = True
|
||||||
|
|
||||||
Print sleep periods between memory checks.
|
Description:
|
||||||
Valid values are True and False.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_psi = False
|
debug_psi = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_gui_notifications = False
|
debug_gui_notifications = False
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_sleep = False
|
debug_sleep = False
|
||||||
|
|
||||||
separate_log = False
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_threading = False
|
debug_threading = False
|
||||||
|
|
||||||
###############################################################################
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
9. Misc
|
separate_log = False
|
||||||
|
|
||||||
max_soft_exit_time = 10
|
|
||||||
|
|
||||||
post_kill_exe =
|
|
||||||
|
|
||||||
forbid_negative_badness = True
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
447
test.conf
447
test.conf
@ -6,186 +6,240 @@
|
|||||||
|
|
||||||
The configuration includes the following sections:
|
The configuration includes the following sections:
|
||||||
|
|
||||||
0. Common zram settings
|
1. Common zram settings
|
||||||
1. Memory levels to respond to as an OOM threat
|
2. Common PSI settings
|
||||||
2. Response on PSI memory metrics
|
3. Poll rate
|
||||||
3. The frequency of checking the level of available memory
|
4. Warnings and notifications
|
||||||
(and CPU usage)
|
5. Soft threshold
|
||||||
4. The prevention of killing innocent victims
|
6. Hard threshold
|
||||||
5. Impact on the badness of processes via matching their names, cgroups and
|
7. Customize victim selection: adjusting badness of processes
|
||||||
cmdlines with specified regular expressions
|
8. Customize soft corrective actions
|
||||||
6. Customize corrective actions: the execution of a specific command
|
9. Misc settings
|
||||||
instead of sending the SIGTERM signal
|
10. Verbosity, debug, logging
|
||||||
7. GUI notifications:
|
|
||||||
- low memory warnings
|
|
||||||
- OOM prevention results
|
|
||||||
8. Output verbosity
|
|
||||||
9. Misc
|
|
||||||
|
|
||||||
Just read the description of the parameters and edit the values.
|
Just read the description of the parameters and edit the values.
|
||||||
Please restart the program after editing the config.
|
Please restart the program after editing the config.
|
||||||
|
|
||||||
|
TODO: improve descriptions
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
0. Common zram settings
|
1. Common zram settings
|
||||||
|
|
||||||
See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
|
Key: zram_checking_enabled
|
||||||
You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize.
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
Default value: False
|
||||||
|
|
||||||
zram_checking_enabled = True
|
zram_checking_enabled = True
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
1. Thresholds below which a signal should be sent to the victim
|
2. Common PSI settings
|
||||||
|
|
||||||
Sets the available memory levels at or below which SIGTERM or SIGKILL
|
Description:
|
||||||
signals are sent. The signal will be sent if MemAvailable and
|
Type: boolean
|
||||||
SwapFree (in /proc/meminfo) at the same time will drop below the
|
Valid values: True and False
|
||||||
corresponding values. Can be specified in % (percent) and M (MiB).
|
|
||||||
Valid values are floating-point numbers from the range [0; 100] %.
|
|
||||||
|
|
||||||
MemAvailable levels.
|
|
||||||
|
|
||||||
soft_threshold_min_mem = 10 %
|
|
||||||
hard_threshold_min_mem = 5 %
|
|
||||||
|
|
||||||
SwapFree levels.
|
|
||||||
|
|
||||||
soft_threshold_min_swap = 15 %
|
|
||||||
hard_threshold_min_swap = 5 %
|
|
||||||
|
|
||||||
Specifying the total share of zram in memory, if exceeded the
|
|
||||||
corresponding signals are sent. As the share of zram in memory
|
|
||||||
increases, it may fall responsiveness of the system. 90 % is a
|
|
||||||
usual hang level, not recommended to set very high.
|
|
||||||
|
|
||||||
Can be specified in % and M. Valid values are floating-point
|
|
||||||
numbers from the range [0; 90] %.
|
|
||||||
|
|
||||||
soft_threshold_max_zram = 50 %
|
|
||||||
hard_threshold_max_zram = 60 %
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
|
|
||||||
|
|
||||||
About PSI:
|
|
||||||
https://facebookmicrosites.github.io/psi/
|
|
||||||
|
|
||||||
Disabled by default (psi_checking_enabled = False).
|
|
||||||
|
|
||||||
psi_checking_enabled = True
|
psi_checking_enabled = True
|
||||||
|
|
||||||
Choose a path to PSI file.
|
Description:
|
||||||
By default it monitors system-wide file: /proc/pressure/memory
|
Type: string
|
||||||
You also can set file to monitor one cgroup slice.
|
Valid values:
|
||||||
For example:
|
|
||||||
psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
|
|
||||||
psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
|
|
||||||
psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
|
|
||||||
|
|
||||||
Execute the command
|
|
||||||
find /sys/fs/cgroup -name memory.pressure
|
|
||||||
to find available memory.pressue files (except /proc/pressure/memory).
|
|
||||||
(actual for cgroup2)
|
|
||||||
|
|
||||||
psi_path = /proc/pressure/memory
|
psi_path = /proc/pressure/memory
|
||||||
|
|
||||||
Valid psi_metrics are:
|
Description:
|
||||||
some_avg10
|
Type: string
|
||||||
some_avg60
|
Valid values:
|
||||||
some_avg300
|
|
||||||
full_avg10
|
|
||||||
full_avg60
|
|
||||||
full_avg300
|
|
||||||
|
|
||||||
some_avg10 is most sensitive.
|
|
||||||
|
|
||||||
psi_metrics = some_avg10
|
psi_metrics = some_avg10
|
||||||
|
|
||||||
soft_threshold_max_psi = 60
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
hard_threshold_max_psi = 90
|
|
||||||
|
|
||||||
>= 0, float
|
|
||||||
psi_excess_duration = 60
|
psi_excess_duration = 60
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
psi_post_action_delay = 60
|
psi_post_action_delay = 60
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
3. The frequency of checking the amount of available memory
|
3. Poll rate
|
||||||
(and CPU usage)
|
|
||||||
|
|
||||||
Coefficients that affect the intensity of monitoring. Reducing
|
Description:
|
||||||
the coefficients can reduce CPU usage and increase the periods
|
Type: float
|
||||||
between memory checks.
|
Valid values:
|
||||||
|
|
||||||
Why three coefficients instead of one? Because the swap fill rate
|
|
||||||
is usually lower than the RAM fill rate.
|
|
||||||
|
|
||||||
It is possible to set a lower intensity of monitoring for swap
|
|
||||||
without compromising to prevent OOM and thus reduce the CPU load.
|
|
||||||
|
|
||||||
Default values are well for desktop. On servers without rapid
|
|
||||||
fluctuations in memory levels the values can be reduced.
|
|
||||||
|
|
||||||
Valid values are positive floating-point numbers.
|
|
||||||
|
|
||||||
fill_rate_mem = 4000
|
fill_rate_mem = 4000
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
fill_rate_swap = 1500
|
fill_rate_swap = 1500
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
fill_rate_zram = 6000
|
fill_rate_zram = 6000
|
||||||
|
|
||||||
See also https://github.com/rfjakob/earlyoom/issues/61
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
max_sleep = 3
|
max_sleep = 3
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
min_sleep = 0.1
|
min_sleep = 0.1
|
||||||
|
|
||||||
Sleep time if soft threshold exceeded.
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
over_sleep = 0.05
|
over_sleep = 0.05
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
4. The prevention of killing innocent victims
|
4. Warnings and notifications
|
||||||
|
|
||||||
Valid values are integers from the range [0; 1000].
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
min_badness = 20
|
post_action_gui_notifications = True
|
||||||
|
|
||||||
Valid values are non-negative floating-point numbers.
|
Description:
|
||||||
Min delay if a victim doesn't respond to SIGTERM in 10 ms.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
post_soft_action_delay = 3
|
low_memory_warnings_enabled = True
|
||||||
|
|
||||||
post_zombie_delay = 0.1
|
Description:
|
||||||
|
Type: string
|
||||||
|
Valid values:
|
||||||
|
|
||||||
victim_cache_time = 10
|
warning_exe =
|
||||||
|
|
||||||
Valid values are True and False.
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
ignore_positive_oom_score_adj = True
|
warning_threshold_min_mem = 20 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
warning_threshold_min_swap = 20 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
warning_threshold_max_zram = 50 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
warning_threshold_max_psi = 100
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
min_post_warning_delay = 30
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
env_cache_time = 300
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
5. Impact on the badness of processes via matching their names,
|
5. Soft threshold
|
||||||
cmdlines or UIDs with regular expressions using re.search().
|
|
||||||
|
|
||||||
See https://en.wikipedia.org/wiki/Regular_expression and
|
Description:
|
||||||
https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Enabling this options slows down the search for the victim
|
soft_threshold_min_mem = 8 %
|
||||||
because the names, cmdlines or UIDs of all processes
|
|
||||||
(except init and kthreads) are compared with the
|
|
||||||
specified regex patterns (in fact slowing down is caused by
|
|
||||||
reading all /proc/*/cmdline and /proc/*/status files).
|
|
||||||
|
|
||||||
Use script `oom-sort` from nohang package to view
|
Description:
|
||||||
names, cmdlines and UIDs of processes.
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
5.1. Matching process names with RE patterns
|
soft_threshold_min_swap = 8 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
soft_threshold_max_zram = 60 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
soft_threshold_max_psi = 60
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
6. Hard threshold
|
||||||
|
|
||||||
|
hard_threshold_min_mem = 4 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
hard_threshold_min_swap = 4 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float (+ % or M)
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
hard_threshold_max_zram = 65 %
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
hard_threshold_max_psi = 90
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
7. Customize victim selection: adjusting badness of processes
|
||||||
|
|
||||||
|
7.1. Ignore positive oom_score_adj
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
|
ignore_positive_oom_score_adj = True
|
||||||
|
|
||||||
|
7.2. Forbid negative badness
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
|
forbid_negative_badness = True
|
||||||
|
|
||||||
|
|
||||||
|
7.3.1. Matching process names with RE patterns change their badness
|
||||||
|
|
||||||
Syntax:
|
Syntax:
|
||||||
|
|
||||||
@ -199,61 +253,69 @@ ignore_positive_oom_score_adj = True
|
|||||||
Example:
|
Example:
|
||||||
@BADNESS_ADJ_RE_NAME -500 /// ^sshd$
|
@BADNESS_ADJ_RE_NAME -500 /// ^sshd$
|
||||||
|
|
||||||
5.2. Matching CGroup_v1-line with RE patterns
|
7.3.2. Matching CGroup_v1-line with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 5 /// ^/system\.slice/
|
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
|
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
|
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
|
||||||
|
|
||||||
5.3. Matching CGroup_v2-line with RE patterns
|
7.3.3. Matching CGroup_v2-line with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
|
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
|
||||||
|
|
||||||
5.4. Matching eUIDs with RE patterns
|
7.3.4. Matching eUIDs with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_UID 50 /// ^0$
|
@BADNESS_ADJ_RE_UID -100 /// ^0$
|
||||||
|
|
||||||
5.5. Matching realpath with RE patterns
|
7.3.5. Matching realpath with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
|
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
|
||||||
|
|
||||||
5.6. Matching cmdlines with RE patterns
|
7.3.6. Matching cmdlines with RE patterns
|
||||||
|
|
||||||
|
@BADNESS_ADJ_RE_CMDLINE 2000 /// ^/bin/sleep
|
||||||
|
|
||||||
A good option that allows fine adjustment.
|
|
||||||
|
|
||||||
Prefer chromium tabs and electron-based apps
|
Prefer chromium tabs and electron-based apps
|
||||||
@BADNESS_ADJ_RE_CMDLINE 2000 /// ^/bin/sleep
|
@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
|
||||||
|
|
||||||
Prefer firefox tabs (Web Content and WebExtensions)
|
Prefer firefox tabs (Web Content and WebExtensions)
|
||||||
@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
|
@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
|
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
|
||||||
|
|
||||||
5.7. Matching environ with RE patterns
|
7.3.7. Matching environ with RE patterns
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
|
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
|
||||||
|
|
||||||
|
|
||||||
Note that you can control badness also via systemd units via
|
Note that you can control badness also via systemd units via
|
||||||
OOMScoreAdjust, see
|
OOMScoreAdjust, see
|
||||||
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
|
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
6. Customize corrective actions.
|
8. Customize soft corrective actions
|
||||||
|
|
||||||
TODO: docs
|
TODO: docs
|
||||||
|
|
||||||
Syntax:
|
Syntax:
|
||||||
KEY REGEXP SEPARATOR COMMAND
|
KEY REGEXP SEPARATOR COMMAND
|
||||||
|
|
||||||
|
|
||||||
@SOFT_ACTION_RE_NAME ^tail$ /// kill -SEGV $PID
|
@SOFT_ACTION_RE_NAME ^tail$ /// kill -SEGV $PID
|
||||||
|
|
||||||
|
|
||||||
|
@SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID
|
||||||
@SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID
|
@SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID
|
||||||
|
|
||||||
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
|
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
|
||||||
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
|
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
|
||||||
|
|
||||||
|
@SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID
|
||||||
|
|
||||||
$PID will be replaced by process PID.
|
$PID will be replaced by process PID.
|
||||||
$NAME will be replaced by process name.
|
$NAME will be replaced by process name.
|
||||||
$SERVICE will be replaced by .service if it exists (overwise it will be
|
$SERVICE will be replaced by .service if it exists (overwise it will be
|
||||||
@ -261,59 +323,80 @@ ignore_positive_oom_score_adj = True
|
|||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
7. GUI notifications & low memory warnings
|
9. Misc settings
|
||||||
|
|
||||||
post_action_gui_notifications = True
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Enable GUI notifications about the low level of available memory.
|
max_soft_exit_time = 10
|
||||||
Valid values are True and False.
|
|
||||||
|
|
||||||
low_memory_warnings_enabled = True
|
Description:
|
||||||
|
Type: string
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Execute the command instead of sending GUI notifications if the value is
|
post_kill_exe =
|
||||||
not empty line. For example:
|
|
||||||
warning_exe = cat /proc/meminfo &
|
|
||||||
|
|
||||||
warning_exe = echo 0
|
Description:
|
||||||
|
Type: integer
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Can be specified in % (percent) and M (MiB).
|
min_badness = 10
|
||||||
Valid values are floating-point numbers from the range [0; 100] %.
|
|
||||||
|
|
||||||
warning_threshold_min_mem = 25 %
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
warning_threshold_min_swap = 35 %
|
post_soft_action_delay = 3
|
||||||
|
|
||||||
warning_threshold_max_zram = 40 %
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
warning_threshold_max_psi = 100
|
post_zombie_delay = 0.1
|
||||||
|
|
||||||
Valid values are floating-point numbers from the range [1; 300].
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
min_post_warning_delay = 20
|
victim_cache_time = 10
|
||||||
|
|
||||||
env_cache_time = 300
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
Ampersands (&) will be replaced with asterisks (*) in process
|
exe_timeout = 20
|
||||||
names and in commands.
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
8. Verbosity
|
10. Verbosity, debug, logging
|
||||||
|
|
||||||
Display the configuration when the program starts.
|
Description:
|
||||||
Valid values are True and False.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_config_at_startup = True
|
print_config_at_startup = True
|
||||||
|
|
||||||
Print memory check results.
|
Description:
|
||||||
Valid values are True and False.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_mem_check_results = True
|
print_mem_check_results = True
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: float
|
||||||
|
Valid values:
|
||||||
|
|
||||||
min_mem_report_interval = 0
|
min_mem_report_interval = 0
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_proc_table = True
|
print_proc_table = True
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: string
|
||||||
Valid values:
|
Valid values:
|
||||||
None
|
None
|
||||||
cgroup_v1
|
cgroup_v1
|
||||||
@ -322,39 +405,61 @@ print_proc_table = True
|
|||||||
cmdline
|
cmdline
|
||||||
environ
|
environ
|
||||||
|
|
||||||
extra_table_info = cgroup_v1
|
extra_table_info = None
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_victim_status = True
|
print_victim_status = True
|
||||||
|
|
||||||
max_victim_ancestry_depth = 99
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_victim_cmdline = True
|
print_victim_cmdline = True
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: integer
|
||||||
|
Valid values:
|
||||||
|
|
||||||
|
max_victim_ancestry_depth = 99
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
print_statistics = True
|
print_statistics = True
|
||||||
|
|
||||||
Print sleep periods between memory checks.
|
Description:
|
||||||
Valid values are True and False.
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_psi = True
|
debug_psi = True
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_gui_notifications = True
|
debug_gui_notifications = True
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_sleep = True
|
debug_sleep = True
|
||||||
|
|
||||||
separate_log = True
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
debug_threading = True
|
debug_threading = True
|
||||||
|
|
||||||
|
Description:
|
||||||
|
Type: boolean
|
||||||
|
Valid values: True and False
|
||||||
|
|
||||||
###############################################################################
|
separate_log = True
|
||||||
|
|
||||||
9. Misc
|
|
||||||
|
|
||||||
max_soft_exit_time = 10
|
|
||||||
|
|
||||||
post_kill_exe = echo 0
|
|
||||||
|
|
||||||
forbid_negative_badness = True
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user