diff --git a/README.md b/README.md index aad8c21..77d269e 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,9 @@ The tools listed above may work at the same time on one computer. - Sending the SIGTERM signal is default corrective action. If the victim does not respond to SIGTERM, with a further drop in the level of memory it gets SIGKILL. - Impact on the badness of processes via matching their - names, + - cgroups, - cmdlines and - - eUIDs + - euids with specified regular expressions - If the name of the victim matches a certain regex pattern, you can run any command instead of sending the SIGTERM signal (the default corrective action) to the victim. For example: - `sysmemctl restart foo` @@ -211,7 +212,7 @@ Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, fe - [x] Handled `UnicodeDecodeError` if victim name consists of many unicode characters ([rfjakob/earlyoom#110](https://github.com/rfjakob/earlyoom/issues/110)) - [x] Fixed process crash before performing corrective actions if Python 3.4 or lower are used to interpret nohang - [x] Improve output: - - [x] Display `oom_score`, `oom_score_adj`, `Ancestry`, `EUID`, `State`, `VmSize`, `RssAnon`, `RssFile`, `RssShmem`, `Realpath`, `Cmdline` and `Lifetime` of the victim in corrective action reports + - [x] Display `oom_score`, `oom_score_adj`, `Ancestry`, `EUID`, `State`, `VmSize`, `RssAnon`, `RssFile`, `RssShmem`, `CGroup`, `Realpath`, `Cmdline` and `Lifetime` of the victim in corrective action reports - [x] Added memory report interval - [x] Added delta memory info (the rate of change of available memory) - [x] Print statistics on corrective actions after each corrective action @@ -224,24 +225,24 @@ Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, fe - [x] Messages are sent to the helper via a temporary file in `/dev/shm` - [x] Deduplication of frequently repeated identical notifications (for example, if the victim does not respond to SIGTERM) - [x] Improved modifing badness via matching with regular expressions: - - [x] Added the ability to set many different `badness_adj` for processes depending on the matching `name`, `cmdline` and `euid` with the specified regular expressions ([issue #74](https://github.com/hakavlad/nohang/issues/11)) + - [x] Added the ability to set many different `badness_adj` for processes depending on the matching `Name`, `CGroup`, `Cmdline` and `EUID` with the specified regular expressions ([issue #74](https://github.com/hakavlad/nohang/issues/11)) - [x] Fix: replace `re.fullmatch()` by `re.search()` - [x] Reduced memory usage: - [x] Reduced memory usage and startup time (using `sys.argv` instead of `argparse`) - [x] Reduced memory usage with `mlockall()` using `MCL_ONFAULT` ([rfjakob/earlyoom#112](https://github.com/rfjakob/earlyoom/issues/112)) and lock all memory by default - [x] Improve poll rate algorithm - - [x] Fixed Makefile for installation on CentOS 7. + - [x] Fixed Makefile for installation on CentOS 7 (remove gzip `-k` option). - [x] Added `max_post_sigterm_victim_lifetime` option: send SIGKILL to the victim if it doesn't respond to SIGTERM for a certain time - [x] Added `post_kill_exe` option (the ability to run any command after killing the victim) - [x] Added `warning_exe` option (the ability to run any command instead of GUI low memory warnings) - - [x] Improve victim search algorithm (do it ~30% faster) ([rfjakob/earlyoom#114](https://github.com/rfjakob/earlyoom/issues/114)) - - [x] Improve limiting `oom_score_adj`: now it can works with UID != 0 + - [x] Improved victim search algorithm (do it ~30% faster) ([rfjakob/earlyoom#114](https://github.com/rfjakob/earlyoom/issues/114)) + - [x] Improved limiting `oom_score_adj`: now it can works with UID != 0 - [x] Fixed conf parsing: use of `line.partition('=')` instead of `line.split('=')` - [x] Added `oom-sort` - [x] Removed self-defense options from the config, use systemd unit scheduling instead - [x] Added the ability to send any signal instead of SIGTERM for processes with certain names - [x] Added initial support for `PSI` - - [ ] Improve user input validation - - [ ] Improve documentation + - [x] Improved user input validation + - [x] Improved documentation - [v0.1](https://github.com/hakavlad/nohang/releases/tag/v0.1), 2018-11-23: Initial release diff --git a/nohang b/nohang index 4853388..f941384 100755 --- a/nohang +++ b/nohang @@ -7,7 +7,8 @@ from time import sleep, time from operator import itemgetter from sys import stdout, stderr, argv, exit, version from signal import SIGKILL, SIGTERM - +from re import search +from sre_constants import error as invalid_re start_time = time() @@ -62,7 +63,28 @@ separate_log = False # will be overwritten after parse config # define functions +def valid_re(reg_exp): + """Validate regular expression. + """ + try: + search(reg_exp, '') + except invalid_re: + log('Invalid config: invalid regexp: {}'.format(reg_exp)) + exit(1) + + +def pid_to_cgroup(pid): + """ + """ + with open('/proc/' + pid + '/cgroup') as f: + for line in f: + if line.startswith('1:'): + return '/' + line.partition('/')[2][:-1] + + def func_print_proc_table(): + """ + """ print_proc_table = True find_victim(print_proc_table) exit() @@ -770,6 +792,12 @@ def pid_to_badness(pid): if search(re_tup[1], name) is not None: badness += int(re_tup[0]) + if re_match_cgroup: + cgroup = pid_to_cgroup(pid) + for re_tup in cgroup_re_list: + if search(re_tup[1], cgroup) is not None: + badness += int(re_tup[0]) + if re_match_cmdline: cmdline = pid_to_cmdline(pid) for re_tup in cmdline_re_list: @@ -819,7 +847,7 @@ def find_victim(_print_proc_table): if _print_proc_table: log('==============================================================' '=================') - log(' PID badness Name eUID cmdline') + log(' PID badness Name eUID CGroup') log('------- ------- --------------- ---------- -----------' '----------------------') @@ -835,8 +863,10 @@ def find_victim(_print_proc_table): str(badness).rjust(7), pid_to_name(pid).ljust(15), pid_to_uid(pid).rjust(10), - pid_to_cmdline(pid) + # pid_to_cmdline(pid) # pid_to_realpath(pid) + pid_to_cgroup(pid) + # '' ) ) @@ -1035,6 +1065,8 @@ def find_victim_info(pid, victim_badness, name): victim_lifetime = format_time(uptime() - pid_to_starttime(pid)) + victim_cgroup = pid_to_cgroup(pid) + victim_info = 'Victim information (found in {} ms):' \ '\n Name: {}' \ '\n State: {}' \ @@ -1047,6 +1079,7 @@ def find_victim_info(pid, victim_badness, name): '\n VmSize: {} MiB' \ '\n VmRSS: {} MiB {}' \ '\n VmSwap: {} MiB' \ + '\n CGroup: {}' \ '\n Realpath: {}' \ '\n Cmdline: {}' \ '\n Lifetime: {}'.format( @@ -1063,6 +1096,7 @@ def find_victim_info(pid, victim_badness, name): str(vm_rss).rjust(len_vm), detailed_rss_info, str(vm_swap).rjust(len_vm), + victim_cgroup, realpath, cmdline, victim_lifetime) @@ -1473,6 +1507,9 @@ config_dict = dict() processname_re_list = [] cmdline_re_list = [] uid_re_list = [] +cgroup_re_list = [] + + # dictionary with names and commands for the parameter # execute_the_command @@ -1480,6 +1517,11 @@ uid_re_list = [] etc_dict = dict() + + + + + try: with open(config) as f: @@ -1494,7 +1536,16 @@ try: if not a and not b and not c and not d and not etc: a = line.partition('=') - config_dict[a[0].strip()] = a[2].strip() + + + key = a[0].strip() + value = a[2].strip() + + if key not in config_dict: + config_dict[key] = value + else: + print('ERROR: config key duplication: {}'.format(key)) + exit(1) if etc: a = line[4:].split('///') @@ -1506,20 +1557,37 @@ try: exit(1) etc_dict[etc_name] = etc_command - # NEED VALIDATION! if line.startswith('@PROCESSNAME_RE'): - a = line.partition('@PROCESSNAME_RE')[ - 2].strip(' \n').partition('///') - processname_re_list.append((a[0].strip(' '), a[2].strip(' '))) + a = line.partition( + '@PROCESSNAME_RE')[2].strip(' \n').partition('///') + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cgroup_re_list.append((badness_adj, reg_exp)) if line.startswith('@CMDLINE_RE'): - a = line.partition('@CMDLINE_RE')[2].strip( - ' \n').partition('///') - cmdline_re_list.append((a[0].strip(' '), a[2].strip(' '))) + a = line.partition( + '@CMDLINE_RE')[2].strip(' \n').partition('///') + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cgroup_re_list.append((badness_adj, reg_exp)) if line.startswith('@UID_RE'): - a = line.partition('@UID_RE')[2].strip(' \n').partition('///') - uid_re_list.append((a[0].strip(' '), a[2].strip(' '))) + a = line.partition( + '@UID_RE')[2].strip(' \n').partition('///') + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cgroup_re_list.append((badness_adj, reg_exp)) + + if line.startswith('@CGROUP_RE'): + a = line.partition( + '@CGROUP_RE')[2].strip(' \n').partition('///') + badness_adj = a[0].strip(' ') + reg_exp = a[2].strip(' ') + valid_re(reg_exp) + cgroup_re_list.append((badness_adj, reg_exp)) except PermissionError: errprint('PermissionError', conf_err_mess) @@ -1564,11 +1632,12 @@ ignore_psi = conf_parse_bool('ignore_psi') regex_matching = conf_parse_bool('regex_matching') re_match_cmdline = conf_parse_bool('re_match_cmdline') re_match_uid = conf_parse_bool('re_match_uid') +re_match_cgroup = conf_parse_bool('re_match_cgroup') -if regex_matching or re_match_cmdline or re_match_uid: +if regex_matching or re_match_cmdline or re_match_uid or re_match_cgroup: from re import search - import sre_constants + from sre_constants import error as invalid_re (mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent ) = calculate_percent('mem_min_sigterm') diff --git a/nohang.conf b/nohang.conf index b752c8a..c6b1872 100644 --- a/nohang.conf +++ b/nohang.conf @@ -101,7 +101,6 @@ sigkill_psi_threshold = 90 psi_post_action_delay = 60 - ##################################################################### 3. The frequency of checking the amount of available memory @@ -139,7 +138,6 @@ max_sleep_time = 3 min_sleep_time = 0.1 - ##################################################################### 4. The prevention of killing innocent victims @@ -219,9 +217,6 @@ regex_matching = False @PROCESSNAME_RE -500 /// ^sshd$ -@PROCESSNAME_RE 300 /// ^(chromium|firefox)$ - - 5.2 Matching cmdlines with RE patterns A good option that allows fine adjustment. @@ -241,6 +236,14 @@ re_match_uid = False @UID_RE -100 /// ^0$ + 5.4 Matching CGroup-line with RE patterns + +re_match_cgroup = False + +@CGROUP_RE -50 /// system.slice + +@CGROUP_RE 50 /// foo.service + Note that you can control badness also via systemd units via OOMScoreAdjust, see https://www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= @@ -322,11 +325,6 @@ gui_low_memory_warnings = False warning_exe = - Минимальное время между отправками уведомлений в секундах. - Valid values are floating-point numbers from the range [1; 300]. - -min_time_between_warnings = 15 - Если значения MemAvailable и SwapFree одновременно будут ниже соотвестствующих значений, то будут отправлены уведомления. @@ -343,10 +341,14 @@ swap_min_warnings = 25 % zram_max_warnings = 40 % + Минимальное время между отправками уведомлений в секундах. + Valid values are floating-point numbers from the range [1; 300]. + +min_time_between_warnings = 15 + Ampersands (&) will be replaced with asterisks (*) in process names and in commands. - ##################################################################### 8. Verbosity