From 431d01772c0e5276dc1a89fa3639252e7fdbdfa4 Mon Sep 17 00:00:00 2001 From: Alexey Avramov Date: Sat, 3 Aug 2019 15:40:01 +0900 Subject: [PATCH] test --- .travis.yml | 6 + nohang | 2 +- test.conf | 357 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 test.conf diff --git a/.travis.yml b/.travis.yml index e04c383..2cd6420 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,19 @@ +dist: bionic + language: python +sudo: required + script: - sudo make install - nohang -h - nohang -v - nohang -cc /etc/nohang/nohang.conf + - nohang -cc test.conf - nohang -p - sudo nohang -p - oom-sort -h - oom-sort - sudo bash -c "nohang & tail /dev/zero & sleep 30 && pkill python3" + - sudo bash -c "nohang -c test.conf & tail /dev/zero & sleep 30 && pkill python3" - sudo make uninstall diff --git a/nohang b/nohang index c37fb6c..58433b5 100755 --- a/nohang +++ b/nohang @@ -3399,5 +3399,5 @@ while True: send_notify_warn() warn_timer = 0 - x = 0 / 0 + sleep_after_check_mem() diff --git a/test.conf b/test.conf new file mode 100644 index 0000000..37ed1db --- /dev/null +++ b/test.conf @@ -0,0 +1,357 @@ + This is nohang config file. + Lines starting with #, tabs and spaces are comments. + Lines starting with @ contain optional parameters. + All values are case sensitive. + Be careful: nohang doesn't forbid you to shoot yourself in the foot. + + The configuration includes the following sections: + + 0. Common zram settings + 1. Memory levels to respond to as an OOM threat + 2. Response on PSI memory metrics + 3. The frequency of checking the level of available memory + (and CPU usage) + 4. The prevention of killing innocent victims + 5. Impact on the badness of processes via matching their names, cgroups and + cmdlines with specified regular expressions + 6. Customize corrective actions: the execution of a specific command + instead of sending the SIGTERM signal + 7. GUI notifications: + - low memory warnings + - OOM prevention results + 8. Output verbosity + 9. Misc + + Just read the description of the parameters and edit the values. + Please restart the program after editing the config. + +############################################################################### + + 0. Common zram settings + + See https://www.kernel.org/doc/Documentation/blockdev/zram.txt + You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize. + +zram_checking_enabled = True + +############################################################################### + + 1. Thresholds below which a signal should be sent to the victim + + Sets the available memory levels at or below which SIGTERM or SIGKILL + signals are sent. The signal will be sent if MemAvailable and + SwapFree (in /proc/meminfo) at the same time will drop below the + corresponding values. Can be specified in % (percent) and M (MiB). + Valid values are floating-point numbers from the range [0; 100] %. + + MemAvailable levels. + +soft_threshold_min_mem = 10 % +hard_threshold_min_mem = 5 % + + SwapFree levels. + +soft_threshold_min_swap = 15 % +hard_threshold_min_swap = 5 % + + Specifying the total share of zram in memory, if exceeded the + corresponding signals are sent. As the share of zram in memory + increases, it may fall responsiveness of the system. 90 % is a + usual hang level, not recommended to set very high. + + Can be specified in % and M. Valid values are floating-point + numbers from the range [0; 90] %. + +soft_threshold_max_zram = 50 % +hard_threshold_max_zram = 60 % + + +############################################################################### + + 2. Response on PSI memory metrics (it needs Linux 4.20 and up) + + About PSI: + https://facebookmicrosites.github.io/psi/ + + Disabled by default (psi_checking_enabled = False). + +psi_checking_enabled = True + + Choose a path to PSI file. + By default it monitors system-wide file: /proc/pressure/memory + You also can set file to monitor one cgroup slice. + For example: + psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure + psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure + psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure + + Execute the command + find /sys/fs/cgroup -name memory.pressure + to find available memory.pressue files (except /proc/pressure/memory). + (actual for cgroup2) + +psi_path = /proc/pressure/memory + + Valid psi_metrics are: + some_avg10 + some_avg60 + some_avg300 + full_avg10 + full_avg60 + full_avg300 + + some_avg10 is most sensitive. + +psi_metrics = some_avg10 + +soft_threshold_max_psi = 60 + +hard_threshold_max_psi = 90 + + >= 0, float +psi_excess_duration = 60 + +psi_post_action_delay = 60 + + +############################################################################### + + 3. The frequency of checking the amount of available memory + (and CPU usage) + + Coefficients that affect the intensity of monitoring. Reducing + the coefficients can reduce CPU usage and increase the periods + between memory checks. + + Why three coefficients instead of one? Because the swap fill rate + is usually lower than the RAM fill rate. + + It is possible to set a lower intensity of monitoring for swap + without compromising to prevent OOM and thus reduce the CPU load. + + Default values are well for desktop. On servers without rapid + fluctuations in memory levels the values can be reduced. + + Valid values are positive floating-point numbers. + +fill_rate_mem = 4000 +fill_rate_swap = 1500 +fill_rate_zram = 6000 + + See also https://github.com/rfjakob/earlyoom/issues/61 + +max_sleep = 3 +min_sleep = 0.1 + + Sleep time if soft threshold exceeded. + +over_sleep = 0.05 + +############################################################################### + + 4. The prevention of killing innocent victims + + Valid values are integers from the range [0; 1000]. + +min_badness = 20 + + Valid values are non-negative floating-point numbers. + Min delay if a victim doesn't respond to SIGTERM in 10 ms. + +post_soft_action_delay = 3 + +post_zombie_delay = 0.1 + +victim_cache_time = 10 + + Valid values are True and False. + +ignore_positive_oom_score_adj = True + +############################################################################### + + 5. Impact on the badness of processes via matching their names, + cmdlines or UIDs with regular expressions using re.search(). + + See https://en.wikipedia.org/wiki/Regular_expression and + https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions + + Enabling this options slows down the search for the victim + because the names, cmdlines or UIDs of all processes + (except init and kthreads) are compared with the + specified regex patterns (in fact slowing down is caused by + reading all /proc/*/cmdline and /proc/*/status files). + + Use script `oom-sort` from nohang package to view + names, cmdlines and UIDs of processes. + + 5.1. Matching process names with RE patterns + + Syntax: + + @BADNESS_ADJ_RE_NAME badness_adj /// RE_pattern + + New badness value will be += badness_adj + + It is possible to compare multiple patterns + with different badness_adj values. + + Example: + @BADNESS_ADJ_RE_NAME -500 /// ^sshd$ + + 5.2. Matching CGroup_v1-line with RE patterns + + @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/ + + @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$ + + @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/ + + 5.3. Matching CGroup_v2-line with RE patterns + + @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload + + 5.4. Matching eUIDs with RE patterns + + @BADNESS_ADJ_RE_UID -100 /// ^0$ + + 5.5. Matching realpath with RE patterns + + @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo + + 5.6. Matching cmdlines with RE patterns + + A good option that allows fine adjustment. + + Prefer chromium tabs and electron-based apps + @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer + + Prefer firefox tabs (Web Content and WebExtensions) + @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni + + @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox + + 5.7. Matching environ with RE patterns + + @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user + + Note that you can control badness also via systemd units via + OOMScoreAdjust, see + www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= + +############################################################################### + + 6. Customize corrective actions. + + TODO: docs + + Syntax: + KEY REGEXP SEPARATOR COMMAND + + @SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID + @SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID + + @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE + @SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE + + $PID will be replaced by process PID. + $NAME will be replaced by process name. + $SERVICE will be replaced by .service if it exists (overwise it will be + relpaced by empty line) + +############################################################################### + + 7. GUI notifications & low memory warnings + +post_action_gui_notifications = True + + Enable GUI notifications about the low level of available memory. + Valid values are True and False. + +low_memory_warnings_enabled = True + + Execute the command instead of sending GUI notifications if the value is + not empty line. For example: + warning_exe = cat /proc/meminfo & + +warning_exe = echo 0 + + Can be specified in % (percent) and M (MiB). + Valid values are floating-point numbers from the range [0; 100] %. + +warning_threshold_min_mem = 25 % + +warning_threshold_min_swap = 35 % + +warning_threshold_max_zram = 40 % + +warning_threshold_max_psi = 100 + + Valid values are floating-point numbers from the range [1; 300]. + +min_post_warning_delay = 20 + + Ampersands (&) will be replaced with asterisks (*) in process + names and in commands. + +############################################################################### + + 8. Verbosity + + Display the configuration when the program starts. + Valid values are True and False. + +print_config_at_startup = True + + Print memory check results. + Valid values are True and False. + +print_mem_check_results = True + +min_mem_report_interval = 0 + +print_proc_table = True + + Valid values: + None + cgroup_v1 + cgroup_v2 + realpath + cmdline + environ + +extra_table_info = cgroup_v1 + +print_victim_status = True + +max_victim_ancestry_depth = 99 + +print_victim_cmdline = True + +print_statistics = True + + Print sleep periods between memory checks. + Valid values are True and False. + +debug_psi = True + +debug_gui_notifications = False + +debug_sleep = True + +separate_log = True + +############################################################################### + + 9. Misc + +max_soft_exit_time = 10 + +post_kill_exe = echo 0 + +forbid_negative_badness = True + +############################################################################### + + Use cases, feature requests and any questions are welcome: + https://github.com/hakavlad/nohang/issues