From d233b17bd523fd0ce0186b67cfcfdab71729d8ee Mon Sep 17 00:00:00 2001 From: Alexey Avramov Date: Mon, 15 Jul 2019 11:22:16 +0900 Subject: [PATCH] add ignore_positive_oom_score_adj --- Makefile | 1 - my_desktop.conf | 365 ------------------------------------------------ nohang | 35 ++--- nohang.conf | 10 +- 4 files changed, 16 insertions(+), 395 deletions(-) delete mode 100644 my_desktop.conf diff --git a/Makefile b/Makefile index eaf8c2e..d3fb60f 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,6 @@ install: install -m0644 ./nohang.conf $(DESTDIR)/$(PREFIX)/etc/nohang/nohang.conf install -m0644 ./nohang.conf $(DESTDIR)/$(PREFIX)/etc/nohang/nohang.conf.default - install -m0644 ./my_desktop.conf $(DESTDIR)/$(PREFIX)/etc/nohang/my_desktop.conf install -d $(DESTDIR)/$(PREFIX)/etc/logrotate.d install -m0644 ./nohang.logrotate $(DESTDIR)/$(PREFIX)/etc/logrotate.d/nohang diff --git a/my_desktop.conf b/my_desktop.conf deleted file mode 100644 index d1b3203..0000000 --- a/my_desktop.conf +++ /dev/null @@ -1,365 +0,0 @@ - This is nohang config file. - Lines starting with #, tabs and spaces are comments. - Lines starting with @ contain optional parameters. - All values are case sensitive. - Be careful: nohang doesn't forbid you to shoot yourself in the foot. - - The configuration includes the following sections: - - 0. Common zram settings - 1. Memory levels to respond to as an OOM threat - 2. Response on PSI memory metrics - 3. The frequency of checking the level of available memory - (and CPU usage) - 4. The prevention of killing innocent victims - 5. Impact on the badness of processes via matching their names, cgroups and - cmdlines with specified regular expressions - 6. Customize corrective actions: the execution of a specific command - instead of sending the SIGTERM signal - 7. GUI notifications: - - low memory warnings - - OOM prevention results - 8. Output verbosity - 9. Misc - - Just read the description of the parameters and edit the values. - Please restart the program after editing the config. - -############################################################################### - - 0. Common zram settings - - See https://www.kernel.org/doc/Documentation/blockdev/zram.txt - You maybe need to set `ignore_zram = False` if you has a big zram disksize. - -ignore_zram = False - -############################################################################### - - 1. Thresholds below which a signal should be sent to the victim - - Sets the available memory levels at or below which SIGTERM or SIGKILL - signals are sent. The signal will be sent if MemAvailable and - SwapFree (in /proc/meminfo) at the same time will drop below the - corresponding values. Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. - - MemAvailable levels. - -mem_min_sigterm = 10 % -mem_min_sigkill = 5 % - - SwapFree levels. - -swap_min_sigterm = 15 % -swap_min_sigkill = 5 % - - Specifying the total share of zram in memory, if exceeded the - corresponding signals are sent. As the share of zram in memory - increases, it may fall responsiveness of the system. 90 % is a - usual hang level, not recommended to set very high. - - Can be specified in % and M. Valid values are floating-point - numbers from the range [0; 90] %. - -zram_max_sigterm = 50 % -zram_max_sigkill = 60 % - -############################################################################### - - 2. Response on PSI memory metrics (it needs Linux 4.20 and up) - - About PSI: - https://facebookmicrosites.github.io/psi/ - - Disabled by default (ignore_psi = True). - -ignore_psi = True - - Choose a path to PSI file. - By default it monitors system-wide file: /proc/pressure/memory - You also can set file to monitor one cgroup slice. - For example: - psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure - - Execute the command - find /sys/fs/cgroup -name memory.pressure - to find available memory.pressue files (except /proc/pressure/memory). - (actual for cgroup2) - -psi_path = /proc/pressure/memory - - Valid psi_metrics are: - some_avg10 - some_avg60 - some_avg300 - full_avg10 - full_avg60 - full_avg300 - - some_avg10 is most sensitive. - -psi_metrics = some_avg10 - -sigterm_psi_threshold = 60 -sigkill_psi_threshold = 90 - - >= 0, float -psi_excess_duration = 60 - -psi_post_action_delay = 90 - - -############################################################################### - - 3. The frequency of checking the amount of available memory - (and CPU usage) - - Coefficients that affect the intensity of monitoring. Reducing - the coefficients can reduce CPU usage and increase the periods - between memory checks. - - Why three coefficients instead of one? Because the swap fill rate - is usually lower than the RAM fill rate. - - It is possible to set a lower intensity of monitoring for swap - without compromising to prevent OOM and thus reduce the CPU load. - - Default values are well for desktop. On servers without rapid - fluctuations in memory levels the values can be reduced. - - Valid values are positive floating-point numbers. - -rate_mem = 4000 -rate_swap = 1500 -rate_zram = 6000 - - See also https://github.com/rfjakob/earlyoom/issues/61 - -max_sleep = 3 -min_sleep = 0.1 - - Sleep time if soft threshold exceeded. - -over_sleep = 0.05 - -############################################################################### - - 4. The prevention of killing innocent victims - - Valid values are integers from the range [0; 1000]. - -min_badness = 30 - - Valid values are non-negative floating-point numbers. - Min delay if a victim doesn't respond to SIGTERM in 10 ms. - -min_delay_after_sigterm = 3 - -post_zombie_delay = 0.1 - -victim_cache_time = 10 - - Valid values are True and False. - -decrease_oom_score_adj = True - - Valid values are integers from the range [0; 1000]. - -oom_score_adj_max = 0 - -############################################################################### - - 5. Impact on the badness of processes via matching their names, - cmdlines or UIDs with regular expressions using re.search(). - - See https://en.wikipedia.org/wiki/Regular_expression and - https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions - - Enabling this options slows down the search for the victim - because the names, cmdlines or UIDs of all processes - (except init and kthreads) are compared with the - specified regex patterns (in fact slowing down is caused by - reading all /proc/*/cmdline and /proc/*/status files). - - Use script `oom-sort` from nohang package to view - names, cmdlines and UIDs of processes. - - - 5.1 Matching process names with RE patterns - - Syntax: - - @BADNESS_ADJ_RE_NAME badness_adj /// RE_pattern - - New badness value will be += badness_adj - - It is possible to compare multiple patterns - with different badness_adj values. - - Example: - @BADNESS_ADJ_RE_NAME -500 /// ^sshd$ - - - 5.2 Matching CGroup-line (v1 and v2) with RE patterns - -@BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/ - - @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$ - - @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/ - - @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload - - - 5.3 Matching eUIDs with RE patterns - - @BADNESS_ADJ_RE_UID -100 /// ^0$ - - - 5.4 Matching realpath with RE patterns - - @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo - - - 5.5 Matching cmdlines with RE patterns - - A good option that allows fine adjustment. - - Prefer chromium tabs and electron-based apps -@BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer - - Prefer firefox tabs (Web Content and WebExtensions) -@BADNESS_ADJ_RE_CMDLINE 100 /// -appomni - -@BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox - - - 5.6 Matching environ with RE patterns - - @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user - - Note that you can control badness also via systemd units via - OOMScoreAdjust, see - www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= - -############################################################################### - - 6. Customize corrective actions. - - TODO: docs - - Syntax: - KEY REGEXP SEPARATOR COMMAND - - @SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID - @SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID - - @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE - @SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE - - $PID will be replaced by process PID. - $NAME will be replaced by process name. - $SERVICE will be replaced by .service if it exists (overwise it will be - relpaced by empty line) - -############################################################################### - - 7. GUI notifications: - - OOM prevention results and - - low memory warnings - -gui_notifications = True - - Enable GUI notifications about the low level of available memory. - Valid values are True and False. - -gui_low_memory_warnings = True - - Execute the command instead of sending GUI notifications if the value is - not empty line. For example: - warning_exe = cat /proc/meminfo & - -warning_exe = - - Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. - -mem_min_warnings = 25 % - -swap_min_warnings = 35 % - -zram_max_warnings = 40 % - -psi_avg_warnings = 100 - - Valid values are floating-point numbers from the range [1; 300]. - -min_time_between_warnings = 20 - - Ampersands (&) will be replaced with asterisks (*) in process - names and in commands. - -############################################################################### - - 8. Verbosity - - Display the configuration when the program starts. - Valid values are True and False. - -print_config = False - - Print memory check results. - Valid values are True and False. - -print_mem_check_results = True - -min_mem_report_interval = 300 - - Print sleep periods between memory checks. - Valid values are True and False. - -print_sleep_periods = False - -print_total_stat = True - -print_proc_table = False - - Valid values: - None - cgroup_v1 - cgroup_v2 - realpath - cmdline - environ - -extra_table_info = None - -print_victim_info = True - -print_victim_cmdline = True - -max_ancestry_depth = 4 - -debug_gui_notifications = False - -separate_log = True - -psi_debug = False - -############################################################################### - - 9. Misc - -max_post_sigterm_victim_lifetime = 10 - -post_kill_exe = - -forbid_negative_badness = True - -############################################################################### - - Use cases, feature requests and any questions are welcome: - https://github.com/hakavlad/nohang/issues diff --git a/nohang b/nohang index 2a0d201..a0d64d3 100755 --- a/nohang +++ b/nohang @@ -64,12 +64,11 @@ def check_config(): log('4. The prevention of killing innocent victims') - log(' min_badness: {}'.format(min_badness)) - log(' min_delay_after_sigterm: {} sec'.format(min_delay_after_sigterm)) - log(' post_zombie_delay: {} sec'.format(post_zombie_delay)) - log(' victim_cache_time: {} sec'.format(victim_cache_time)) - log(' decrease_oom_score_adj: {}'.format(decrease_oom_score_adj)) - log(' oom_score_adj_max: {}'.format(oom_score_adj_max)) + log(' min_badness: {}'.format(min_badness)) + log(' min_delay_after_sigterm: {} sec'.format(min_delay_after_sigterm)) + log(' post_zombie_delay: {} sec'.format(post_zombie_delay)) + log(' victim_cache_time: {} sec'.format(victim_cache_time)) + log(' ignore_positive_oom_score_adj: {}'.format(ignore_positive_oom_score_adj)) log('5. Impact on the badness of processes') @@ -555,10 +554,10 @@ def pid_to_badness(pid): oom_score = int(rline1('/proc/' + pid + '/oom_score')) badness = oom_score - if decrease_oom_score_adj: + if ignore_positive_oom_score_adj: oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj')) - if badness > oom_score_adj_max and oom_score_adj > 0: - badness = badness - oom_score_adj + oom_score_adj_max + if oom_score_adj > 0: + badness = badness - oom_score_adj if regex_matching: name = pid_to_name(pid) @@ -2633,10 +2632,13 @@ print_mem_check_results = conf_parse_bool('print_mem_check_results') print_sleep_periods = conf_parse_bool('print_sleep_periods') gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings') gui_notifications = conf_parse_bool('gui_notifications') -decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj') ignore_psi = conf_parse_bool('ignore_psi') ignore_zram = conf_parse_bool('ignore_zram') debug_gui_notifications = conf_parse_bool('debug_gui_notifications') +ignore_positive_oom_score_adj = conf_parse_bool('ignore_positive_oom_score_adj') + + + (mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent @@ -2823,18 +2825,7 @@ else: exit(1) -if 'oom_score_adj_max' in config_dict: - oom_score_adj_max = string_to_int_convert_test( - config_dict['oom_score_adj_max']) - if oom_score_adj_max is None: - errprint('Invalid oom_score_adj_max value, not integer\nExit') - exit(1) - if oom_score_adj_max < 0 or oom_score_adj_max > 1000: - errprint('Invalid oom_score_adj_max value\nExit') - exit(1) -else: - errprint('oom_score_adj_max not in config\nExit') - exit(1) + if 'min_time_between_warnings' in config_dict: diff --git a/nohang.conf b/nohang.conf index 67bc8cd..3bfdcff 100644 --- a/nohang.conf +++ b/nohang.conf @@ -109,7 +109,7 @@ sigkill_psi_threshold = 90 >= 0, float psi_excess_duration = 60 -psi_post_action_delay = 90 +psi_post_action_delay = 60 ############################################################################### @@ -164,11 +164,7 @@ victim_cache_time = 10 Valid values are True and False. -decrease_oom_score_adj = False - - Valid values are integers from the range [0; 1000]. - -oom_score_adj_max = 0 +ignore_positive_oom_score_adj = False ############################################################################### @@ -337,7 +333,7 @@ print_victim_info = True print_victim_cmdline = False -max_ancestry_depth = 4 +max_ancestry_depth = 3 debug_gui_notifications = False