add ignore_positive_oom_score_adj
This commit is contained in:
parent
20493b9a50
commit
d233b17bd5
1
Makefile
1
Makefile
@ -21,7 +21,6 @@ install:
|
|||||||
|
|
||||||
install -m0644 ./nohang.conf $(DESTDIR)/$(PREFIX)/etc/nohang/nohang.conf
|
install -m0644 ./nohang.conf $(DESTDIR)/$(PREFIX)/etc/nohang/nohang.conf
|
||||||
install -m0644 ./nohang.conf $(DESTDIR)/$(PREFIX)/etc/nohang/nohang.conf.default
|
install -m0644 ./nohang.conf $(DESTDIR)/$(PREFIX)/etc/nohang/nohang.conf.default
|
||||||
install -m0644 ./my_desktop.conf $(DESTDIR)/$(PREFIX)/etc/nohang/my_desktop.conf
|
|
||||||
|
|
||||||
install -d $(DESTDIR)/$(PREFIX)/etc/logrotate.d
|
install -d $(DESTDIR)/$(PREFIX)/etc/logrotate.d
|
||||||
install -m0644 ./nohang.logrotate $(DESTDIR)/$(PREFIX)/etc/logrotate.d/nohang
|
install -m0644 ./nohang.logrotate $(DESTDIR)/$(PREFIX)/etc/logrotate.d/nohang
|
||||||
|
365
my_desktop.conf
365
my_desktop.conf
@ -1,365 +0,0 @@
|
|||||||
This is nohang config file.
|
|
||||||
Lines starting with #, tabs and spaces are comments.
|
|
||||||
Lines starting with @ contain optional parameters.
|
|
||||||
All values are case sensitive.
|
|
||||||
Be careful: nohang doesn't forbid you to shoot yourself in the foot.
|
|
||||||
|
|
||||||
The configuration includes the following sections:
|
|
||||||
|
|
||||||
0. Common zram settings
|
|
||||||
1. Memory levels to respond to as an OOM threat
|
|
||||||
2. Response on PSI memory metrics
|
|
||||||
3. The frequency of checking the level of available memory
|
|
||||||
(and CPU usage)
|
|
||||||
4. The prevention of killing innocent victims
|
|
||||||
5. Impact on the badness of processes via matching their names, cgroups and
|
|
||||||
cmdlines with specified regular expressions
|
|
||||||
6. Customize corrective actions: the execution of a specific command
|
|
||||||
instead of sending the SIGTERM signal
|
|
||||||
7. GUI notifications:
|
|
||||||
- low memory warnings
|
|
||||||
- OOM prevention results
|
|
||||||
8. Output verbosity
|
|
||||||
9. Misc
|
|
||||||
|
|
||||||
Just read the description of the parameters and edit the values.
|
|
||||||
Please restart the program after editing the config.
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
0. Common zram settings
|
|
||||||
|
|
||||||
See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
|
|
||||||
You maybe need to set `ignore_zram = False` if you has a big zram disksize.
|
|
||||||
|
|
||||||
ignore_zram = False
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
1. Thresholds below which a signal should be sent to the victim
|
|
||||||
|
|
||||||
Sets the available memory levels at or below which SIGTERM or SIGKILL
|
|
||||||
signals are sent. The signal will be sent if MemAvailable and
|
|
||||||
SwapFree (in /proc/meminfo) at the same time will drop below the
|
|
||||||
corresponding values. Can be specified in % (percent) and M (MiB).
|
|
||||||
Valid values are floating-point numbers from the range [0; 100] %.
|
|
||||||
|
|
||||||
MemAvailable levels.
|
|
||||||
|
|
||||||
mem_min_sigterm = 10 %
|
|
||||||
mem_min_sigkill = 5 %
|
|
||||||
|
|
||||||
SwapFree levels.
|
|
||||||
|
|
||||||
swap_min_sigterm = 15 %
|
|
||||||
swap_min_sigkill = 5 %
|
|
||||||
|
|
||||||
Specifying the total share of zram in memory, if exceeded the
|
|
||||||
corresponding signals are sent. As the share of zram in memory
|
|
||||||
increases, it may fall responsiveness of the system. 90 % is a
|
|
||||||
usual hang level, not recommended to set very high.
|
|
||||||
|
|
||||||
Can be specified in % and M. Valid values are floating-point
|
|
||||||
numbers from the range [0; 90] %.
|
|
||||||
|
|
||||||
zram_max_sigterm = 50 %
|
|
||||||
zram_max_sigkill = 60 %
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
|
|
||||||
|
|
||||||
About PSI:
|
|
||||||
https://facebookmicrosites.github.io/psi/
|
|
||||||
|
|
||||||
Disabled by default (ignore_psi = True).
|
|
||||||
|
|
||||||
ignore_psi = True
|
|
||||||
|
|
||||||
Choose a path to PSI file.
|
|
||||||
By default it monitors system-wide file: /proc/pressure/memory
|
|
||||||
You also can set file to monitor one cgroup slice.
|
|
||||||
For example:
|
|
||||||
psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
|
|
||||||
psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
|
|
||||||
psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
|
|
||||||
|
|
||||||
Execute the command
|
|
||||||
find /sys/fs/cgroup -name memory.pressure
|
|
||||||
to find available memory.pressue files (except /proc/pressure/memory).
|
|
||||||
(actual for cgroup2)
|
|
||||||
|
|
||||||
psi_path = /proc/pressure/memory
|
|
||||||
|
|
||||||
Valid psi_metrics are:
|
|
||||||
some_avg10
|
|
||||||
some_avg60
|
|
||||||
some_avg300
|
|
||||||
full_avg10
|
|
||||||
full_avg60
|
|
||||||
full_avg300
|
|
||||||
|
|
||||||
some_avg10 is most sensitive.
|
|
||||||
|
|
||||||
psi_metrics = some_avg10
|
|
||||||
|
|
||||||
sigterm_psi_threshold = 60
|
|
||||||
sigkill_psi_threshold = 90
|
|
||||||
|
|
||||||
>= 0, float
|
|
||||||
psi_excess_duration = 60
|
|
||||||
|
|
||||||
psi_post_action_delay = 90
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
3. The frequency of checking the amount of available memory
|
|
||||||
(and CPU usage)
|
|
||||||
|
|
||||||
Coefficients that affect the intensity of monitoring. Reducing
|
|
||||||
the coefficients can reduce CPU usage and increase the periods
|
|
||||||
between memory checks.
|
|
||||||
|
|
||||||
Why three coefficients instead of one? Because the swap fill rate
|
|
||||||
is usually lower than the RAM fill rate.
|
|
||||||
|
|
||||||
It is possible to set a lower intensity of monitoring for swap
|
|
||||||
without compromising to prevent OOM and thus reduce the CPU load.
|
|
||||||
|
|
||||||
Default values are well for desktop. On servers without rapid
|
|
||||||
fluctuations in memory levels the values can be reduced.
|
|
||||||
|
|
||||||
Valid values are positive floating-point numbers.
|
|
||||||
|
|
||||||
rate_mem = 4000
|
|
||||||
rate_swap = 1500
|
|
||||||
rate_zram = 6000
|
|
||||||
|
|
||||||
See also https://github.com/rfjakob/earlyoom/issues/61
|
|
||||||
|
|
||||||
max_sleep = 3
|
|
||||||
min_sleep = 0.1
|
|
||||||
|
|
||||||
Sleep time if soft threshold exceeded.
|
|
||||||
|
|
||||||
over_sleep = 0.05
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
4. The prevention of killing innocent victims
|
|
||||||
|
|
||||||
Valid values are integers from the range [0; 1000].
|
|
||||||
|
|
||||||
min_badness = 30
|
|
||||||
|
|
||||||
Valid values are non-negative floating-point numbers.
|
|
||||||
Min delay if a victim doesn't respond to SIGTERM in 10 ms.
|
|
||||||
|
|
||||||
min_delay_after_sigterm = 3
|
|
||||||
|
|
||||||
post_zombie_delay = 0.1
|
|
||||||
|
|
||||||
victim_cache_time = 10
|
|
||||||
|
|
||||||
Valid values are True and False.
|
|
||||||
|
|
||||||
decrease_oom_score_adj = True
|
|
||||||
|
|
||||||
Valid values are integers from the range [0; 1000].
|
|
||||||
|
|
||||||
oom_score_adj_max = 0
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
5. Impact on the badness of processes via matching their names,
|
|
||||||
cmdlines or UIDs with regular expressions using re.search().
|
|
||||||
|
|
||||||
See https://en.wikipedia.org/wiki/Regular_expression and
|
|
||||||
https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
|
|
||||||
|
|
||||||
Enabling this options slows down the search for the victim
|
|
||||||
because the names, cmdlines or UIDs of all processes
|
|
||||||
(except init and kthreads) are compared with the
|
|
||||||
specified regex patterns (in fact slowing down is caused by
|
|
||||||
reading all /proc/*/cmdline and /proc/*/status files).
|
|
||||||
|
|
||||||
Use script `oom-sort` from nohang package to view
|
|
||||||
names, cmdlines and UIDs of processes.
|
|
||||||
|
|
||||||
|
|
||||||
5.1 Matching process names with RE patterns
|
|
||||||
|
|
||||||
Syntax:
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_NAME badness_adj /// RE_pattern
|
|
||||||
|
|
||||||
New badness value will be += badness_adj
|
|
||||||
|
|
||||||
It is possible to compare multiple patterns
|
|
||||||
with different badness_adj values.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
@BADNESS_ADJ_RE_NAME -500 /// ^sshd$
|
|
||||||
|
|
||||||
|
|
||||||
5.2 Matching CGroup-line (v1 and v2) with RE patterns
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
|
|
||||||
|
|
||||||
|
|
||||||
5.3 Matching eUIDs with RE patterns
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_UID -100 /// ^0$
|
|
||||||
|
|
||||||
|
|
||||||
5.4 Matching realpath with RE patterns
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
|
|
||||||
|
|
||||||
|
|
||||||
5.5 Matching cmdlines with RE patterns
|
|
||||||
|
|
||||||
A good option that allows fine adjustment.
|
|
||||||
|
|
||||||
Prefer chromium tabs and electron-based apps
|
|
||||||
@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
|
|
||||||
|
|
||||||
Prefer firefox tabs (Web Content and WebExtensions)
|
|
||||||
@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
|
|
||||||
|
|
||||||
|
|
||||||
5.6 Matching environ with RE patterns
|
|
||||||
|
|
||||||
@BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
|
|
||||||
|
|
||||||
Note that you can control badness also via systemd units via
|
|
||||||
OOMScoreAdjust, see
|
|
||||||
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
6. Customize corrective actions.
|
|
||||||
|
|
||||||
TODO: docs
|
|
||||||
|
|
||||||
Syntax:
|
|
||||||
KEY REGEXP SEPARATOR COMMAND
|
|
||||||
|
|
||||||
@SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID
|
|
||||||
@SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID
|
|
||||||
|
|
||||||
@SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
|
|
||||||
@SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
|
|
||||||
|
|
||||||
$PID will be replaced by process PID.
|
|
||||||
$NAME will be replaced by process name.
|
|
||||||
$SERVICE will be replaced by .service if it exists (overwise it will be
|
|
||||||
relpaced by empty line)
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
7. GUI notifications:
|
|
||||||
- OOM prevention results and
|
|
||||||
- low memory warnings
|
|
||||||
|
|
||||||
gui_notifications = True
|
|
||||||
|
|
||||||
Enable GUI notifications about the low level of available memory.
|
|
||||||
Valid values are True and False.
|
|
||||||
|
|
||||||
gui_low_memory_warnings = True
|
|
||||||
|
|
||||||
Execute the command instead of sending GUI notifications if the value is
|
|
||||||
not empty line. For example:
|
|
||||||
warning_exe = cat /proc/meminfo &
|
|
||||||
|
|
||||||
warning_exe =
|
|
||||||
|
|
||||||
Can be specified in % (percent) and M (MiB).
|
|
||||||
Valid values are floating-point numbers from the range [0; 100] %.
|
|
||||||
|
|
||||||
mem_min_warnings = 25 %
|
|
||||||
|
|
||||||
swap_min_warnings = 35 %
|
|
||||||
|
|
||||||
zram_max_warnings = 40 %
|
|
||||||
|
|
||||||
psi_avg_warnings = 100
|
|
||||||
|
|
||||||
Valid values are floating-point numbers from the range [1; 300].
|
|
||||||
|
|
||||||
min_time_between_warnings = 20
|
|
||||||
|
|
||||||
Ampersands (&) will be replaced with asterisks (*) in process
|
|
||||||
names and in commands.
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
8. Verbosity
|
|
||||||
|
|
||||||
Display the configuration when the program starts.
|
|
||||||
Valid values are True and False.
|
|
||||||
|
|
||||||
print_config = False
|
|
||||||
|
|
||||||
Print memory check results.
|
|
||||||
Valid values are True and False.
|
|
||||||
|
|
||||||
print_mem_check_results = True
|
|
||||||
|
|
||||||
min_mem_report_interval = 300
|
|
||||||
|
|
||||||
Print sleep periods between memory checks.
|
|
||||||
Valid values are True and False.
|
|
||||||
|
|
||||||
print_sleep_periods = False
|
|
||||||
|
|
||||||
print_total_stat = True
|
|
||||||
|
|
||||||
print_proc_table = False
|
|
||||||
|
|
||||||
Valid values:
|
|
||||||
None
|
|
||||||
cgroup_v1
|
|
||||||
cgroup_v2
|
|
||||||
realpath
|
|
||||||
cmdline
|
|
||||||
environ
|
|
||||||
|
|
||||||
extra_table_info = None
|
|
||||||
|
|
||||||
print_victim_info = True
|
|
||||||
|
|
||||||
print_victim_cmdline = True
|
|
||||||
|
|
||||||
max_ancestry_depth = 4
|
|
||||||
|
|
||||||
debug_gui_notifications = False
|
|
||||||
|
|
||||||
separate_log = True
|
|
||||||
|
|
||||||
psi_debug = False
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
9. Misc
|
|
||||||
|
|
||||||
max_post_sigterm_victim_lifetime = 10
|
|
||||||
|
|
||||||
post_kill_exe =
|
|
||||||
|
|
||||||
forbid_negative_badness = True
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
Use cases, feature requests and any questions are welcome:
|
|
||||||
https://github.com/hakavlad/nohang/issues
|
|
27
nohang
27
nohang
@ -68,8 +68,7 @@ def check_config():
|
|||||||
log(' min_delay_after_sigterm: {} sec'.format(min_delay_after_sigterm))
|
log(' min_delay_after_sigterm: {} sec'.format(min_delay_after_sigterm))
|
||||||
log(' post_zombie_delay: {} sec'.format(post_zombie_delay))
|
log(' post_zombie_delay: {} sec'.format(post_zombie_delay))
|
||||||
log(' victim_cache_time: {} sec'.format(victim_cache_time))
|
log(' victim_cache_time: {} sec'.format(victim_cache_time))
|
||||||
log(' decrease_oom_score_adj: {}'.format(decrease_oom_score_adj))
|
log(' ignore_positive_oom_score_adj: {}'.format(ignore_positive_oom_score_adj))
|
||||||
log(' oom_score_adj_max: {}'.format(oom_score_adj_max))
|
|
||||||
|
|
||||||
log('5. Impact on the badness of processes')
|
log('5. Impact on the badness of processes')
|
||||||
|
|
||||||
@ -555,10 +554,10 @@ def pid_to_badness(pid):
|
|||||||
oom_score = int(rline1('/proc/' + pid + '/oom_score'))
|
oom_score = int(rline1('/proc/' + pid + '/oom_score'))
|
||||||
badness = oom_score
|
badness = oom_score
|
||||||
|
|
||||||
if decrease_oom_score_adj:
|
if ignore_positive_oom_score_adj:
|
||||||
oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
|
oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
|
||||||
if badness > oom_score_adj_max and oom_score_adj > 0:
|
if oom_score_adj > 0:
|
||||||
badness = badness - oom_score_adj + oom_score_adj_max
|
badness = badness - oom_score_adj
|
||||||
|
|
||||||
if regex_matching:
|
if regex_matching:
|
||||||
name = pid_to_name(pid)
|
name = pid_to_name(pid)
|
||||||
@ -2633,10 +2632,13 @@ print_mem_check_results = conf_parse_bool('print_mem_check_results')
|
|||||||
print_sleep_periods = conf_parse_bool('print_sleep_periods')
|
print_sleep_periods = conf_parse_bool('print_sleep_periods')
|
||||||
gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings')
|
gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings')
|
||||||
gui_notifications = conf_parse_bool('gui_notifications')
|
gui_notifications = conf_parse_bool('gui_notifications')
|
||||||
decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj')
|
|
||||||
ignore_psi = conf_parse_bool('ignore_psi')
|
ignore_psi = conf_parse_bool('ignore_psi')
|
||||||
ignore_zram = conf_parse_bool('ignore_zram')
|
ignore_zram = conf_parse_bool('ignore_zram')
|
||||||
debug_gui_notifications = conf_parse_bool('debug_gui_notifications')
|
debug_gui_notifications = conf_parse_bool('debug_gui_notifications')
|
||||||
|
ignore_positive_oom_score_adj = conf_parse_bool('ignore_positive_oom_score_adj')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent
|
(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent
|
||||||
@ -2823,18 +2825,7 @@ else:
|
|||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
if 'oom_score_adj_max' in config_dict:
|
|
||||||
oom_score_adj_max = string_to_int_convert_test(
|
|
||||||
config_dict['oom_score_adj_max'])
|
|
||||||
if oom_score_adj_max is None:
|
|
||||||
errprint('Invalid oom_score_adj_max value, not integer\nExit')
|
|
||||||
exit(1)
|
|
||||||
if oom_score_adj_max < 0 or oom_score_adj_max > 1000:
|
|
||||||
errprint('Invalid oom_score_adj_max value\nExit')
|
|
||||||
exit(1)
|
|
||||||
else:
|
|
||||||
errprint('oom_score_adj_max not in config\nExit')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
if 'min_time_between_warnings' in config_dict:
|
if 'min_time_between_warnings' in config_dict:
|
||||||
|
10
nohang.conf
10
nohang.conf
@ -109,7 +109,7 @@ sigkill_psi_threshold = 90
|
|||||||
>= 0, float
|
>= 0, float
|
||||||
psi_excess_duration = 60
|
psi_excess_duration = 60
|
||||||
|
|
||||||
psi_post_action_delay = 90
|
psi_post_action_delay = 60
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
@ -164,11 +164,7 @@ victim_cache_time = 10
|
|||||||
|
|
||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
|
|
||||||
decrease_oom_score_adj = False
|
ignore_positive_oom_score_adj = False
|
||||||
|
|
||||||
Valid values are integers from the range [0; 1000].
|
|
||||||
|
|
||||||
oom_score_adj_max = 0
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
@ -337,7 +333,7 @@ print_victim_info = True
|
|||||||
|
|
||||||
print_victim_cmdline = False
|
print_victim_cmdline = False
|
||||||
|
|
||||||
max_ancestry_depth = 4
|
max_ancestry_depth = 3
|
||||||
|
|
||||||
debug_gui_notifications = False
|
debug_gui_notifications = False
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user