From 62dd52b7fd7a4a973cec2673c92cf0c9c70b8d29 Mon Sep 17 00:00:00 2001 From: Alexey Avramov Date: Sun, 2 Dec 2018 18:56:29 +0900 Subject: [PATCH] improve output and regex matching, add oom-sort --- README.md | 17 ++++--- nohang | 133 +++++++++++++++++++++++++++++++++++++++------------- nohang.conf | 11 +++-- oom-sort | 87 ++++++++++++++++++++++++++++++++++ oom-top | 118 ---------------------------------------------- 5 files changed, 206 insertions(+), 160 deletions(-) create mode 100755 oom-sort delete mode 100755 oom-top diff --git a/README.md b/README.md index 6817694..04f8c17 100644 --- a/README.md +++ b/README.md @@ -114,7 +114,7 @@ The program can be configured by editing the [config file](https://github.com/ha 1. Memory levels to respond to as an OOM threat 2. The frequency of checking the level of available memory (and CPU usage) 3. The prevention of killing innocent victims -4. Impact on the badness of processes via matching their names with regular expressions +4. Impact on the badness of processes via matching their names and cmdlines with regular expressions 5. The execution of a specific command instead of sending the SIGTERM signal 6. GUI notifications: - results of preventing OOM @@ -125,17 +125,17 @@ The program can be configured by editing the [config file](https://github.com/ha Just read the description of the parameters and edit the values. Please restart nohang to apply changes. Default path to the config after installing is `/etc/nohang/nohang.conf`. -## oom-top +## oom-sort -`oom-top` is an additional diagnostic tool from the nohang package. It sorts the processes in descending order of their oom_score and also displays oom_score_adj, Pid, Name, VmRSS, VmSwap. It will be installed together with nohang. +`oom-sort` is an additional diagnostic tool that will be installed with `nohang` package. It sorts the processes in descending order of their oom_score and also displays oom_score_adj, Uid, Pid, Name, VmRSS, VmSwap and optionally cmdline. Run `oom-sort --help` for more info. Usage: ``` -$ oom-top +$ oom-sort ``` -Output like this (monitors top 20 processes with period = 1 sec): +Output like follow: ``` oom_score oom_adj oom_score_adj Pid Name RSS Swap @@ -170,11 +170,14 @@ See also `man journalctl`. Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, feature requests and any questions are welcome. - ## Changelog - In progress - - add oom-top + - add oom-sort + - add oom-trigger + - fix regex matching: replace re.fullmatch() by re.search() + - add suppot cmdline regex matching + - improve output: display cmdline and Uid in corrective action reports - [v0.1](https://github.com/hakavlad/nohang/releases/tag/v0.1), 2018-11-23 - 1st release diff --git a/nohang b/nohang index 65f5629..9b110cb 100755 --- a/nohang +++ b/nohang @@ -8,6 +8,10 @@ from argparse import ArgumentParser from sys import stdout +import re + +vm_rss = 9 + sig_dict = {signal.SIGKILL: 'SIGKILL', signal.SIGTERM: 'SIGTERM'} @@ -222,6 +226,28 @@ def pid_to_name(pid): return '' + +def pid_to_cmdline(pid): + """ + Get process cmdline by pid. + + pid: str pid of required process + returns string cmdline + """ + with open('/proc/' + pid + '/cmdline') as file: + try: + return file.readlines()[0].replace('\x00', ' ').strip() + except IndexError: + return '' + + + + + + + + + def send_notify_warn(): """ Look for process with maximum 'badness' and warn user with notification. @@ -266,7 +292,9 @@ def send_notify(signal, name, pid): """ title = 'Preventing OOM' body = '{} process {}, {}'.format( - notify_sig_dict[signal], pid, name.replace('&', '*')) + notify_sig_dict[signal], pid, name.replace( + # & может ломать уведомления в некоторых темах оформления notify-send, поэтому заменяется на * + '&', '*')) if root: # send notification to all active users with notify-send b = root_notify_env() @@ -324,37 +352,77 @@ def sleep_after_send_signal(signal): sleep(min_delay_after_sigterm) + + + + + + + def find_victim_and_send_signal(signal): """ Find victim with highest badness and send SIGTERM/SIGKILL """ if decrease_oom_score_adj and root: + # это не оптимальное решение func_decrease_oom_score_adj(oom_score_adj_max) pid_badness_list = [] + # not implemented, in progress + prefer_re_cmdline = '' + prefer_cmd_factor = 1 + + if regex_matching: + for pid in os.listdir('/proc'): - if pid[0].isdecimal() is not True: + # только директории, имена которых состоят только из цифр, за исключением /proc/1/ + if pid[0].isdecimal() is not True or pid == '1': continue try: badness = int(rline1('/proc/' + pid + '/oom_score')) - name = pid_to_name(pid) - if fullmatch(avoid_regex, name) is not None: + + name = pid_to_name(pid) + cmdline = pid_to_cmdline(pid) + # отсеять потоки ядра + if cmdline == '': + continue + + #print([pid], [name], [cmdline]) + + if re.search(avoid_regex, name) is not None: badness = int(badness / avoid_factor) - if fullmatch(prefer_regex, name) is not None: + if re.search(prefer_regex, name) is not None: badness = int((badness + 1) * prefer_factor) + + + if re.search(avoid_re_cmdline, cmdline) is not None: + badness = int(badness / avoid_factor) + + + if re.search(prefer_re_cmdline, cmdline) is not None: + badness = int((badness + 1) * prefer_cmd_factor) + + except FileNotFoundError: badness = 0 except ProcessLookupError: badness = 0 pid_badness_list.append((pid, badness)) + + + + + + + else: for pid in os.listdir('/proc'): @@ -380,45 +448,48 @@ def find_victim_and_send_signal(signal): pid = pid_tuple_list[0] name = pid_to_name(pid) - # Get VmRSS and VmSwap of victim process and try to send signal + # Get VmRSS and VmSwap and cmdline of victim process and try to send signal try: with open('/proc/' + pid + '/status') as f: for n, line in enumerate(f): + if n is uid_index: + uid = line.split('\t')[1] + continue if n is vm_rss_index: - vm_rss = kib_to_mib(int( - line.split('\t')[1][:-4])) + vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) continue if n is vm_swap_index: - vm_swap = kib_to_mib(int( - line.split('\t')[1][:-4])) + vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) break + with open('/proc/' + pid + '/cmdline') as file: + try: + cmdline = file.readlines()[0].replace('\x00', ' ') + except IndexError: + cmdline = '' + except FileNotFoundError: - vm_rss = 0 - vm_swap = 0 + pass + + + # исключение - значит вообще не надо бить этот процесс. Или искать новый, или принт: процесс хуёвый, стоп! + except ProcessLookupError: - vm_rss = 0 - vm_swap = 0 + pass except IndexError: - vm_rss = 0 - vm_swap = 0 + pass except ValueError: - vm_rss = 0 - vm_swap = 0 + pass if execute_the_command and signal is signal.SIGTERM and name in etc_dict: command = etc_dict[name] exit_status = os.system(etc_dict[name]) response_time = time() - time0 - etc_info = ''' Finding the process with the highest badness\n - Victim is {}, pid:{}, badness:{}, VmRSS: {} MiB, VmSwap: {} MiB\n - Execute the command: {}\n - Exit status: {}; response time: {} ms'''.format( - name, pid, victim_badness, vm_rss, vm_swap, command, - exit_status, round(response_time * 1000)) + + etc_info = ''' Found the victim with highest badness:\n Name: {}\n Pid: {}\n Uid: {}\n Badness: {}\n VmRSS: {} MiB\n VmSwap: {} MiB\n Execute the command: {}\n Exit status: {}; response time: {} ms'''.format(name, pid, uid, victim_badness, vm_rss, vm_swap, command, exit_status, round(response_time * 1000)) + print(mem_info) print(etc_info) - if gui_notifications: - send_notify_etc(pid, name, command) + if gui_notifications: send_notify_etc(pid, name, command) else: @@ -440,11 +511,8 @@ def find_victim_and_send_signal(signal): send_result = 'no such process; response time: {} ms'.format( round(response_time * 1000)) - preventing_oom_message = ' Finding the process with the highes' \ - 't badness\n Victim is {}, pid: {}, badness: {}, VmRS' \ - 'S: {} MiB, VmSwap: {} MiB\n Sending {} to the victim; {}'.format( - name, pid, victim_badness, vm_rss, vm_swap, sig_dict[signal], - send_result) + preventing_oom_message = ' Found the process with highest badness:\n Name: {}\n Pid: {}\n Uid: {}\n Badness: {}\n VmRSS: {} MiB\n VmSwap: {} MiB\n Cmdline: {}\n Sending {} to the victim; {}'.format( + name, pid, uid, victim_badness, vm_rss, vm_swap, cmdline, sig_dict[signal], send_result) print(mem_info) print(preventing_oom_message) @@ -629,7 +697,7 @@ for s in status_list: vm_rss_index = status_names.index('VmRSS') vm_swap_index = status_names.index('VmSwap') - +uid_index = status_names.index('Uid') ########################################################################## @@ -751,6 +819,7 @@ execute_the_command = conf_parse_bool('execute_the_command') prefer_regex = conf_parse_string('prefer_regex') avoid_regex = conf_parse_string('avoid_regex') + mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent = calculate_percent( 'mem_min_sigterm') mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent = calculate_percent( diff --git a/nohang.conf b/nohang.conf index 09514d9..2febe4b 100644 --- a/nohang.conf +++ b/nohang.conf @@ -132,13 +132,16 @@ oom_score_adj_max = 30 Valid values are True and False. -regex_matching = False +regex_matching = True Badness of processes whose names correspond to prefer_regex will be calculated by the following formula: badness = (oom_score + 1) * prefer_factor -prefer_regex = tail|python3 +prefer_regex = + + +# prefer_re_cmdline = ^/usr/lib/firefox # not implemented, in progress Valid values are floating-point numbers from the range [1; 1000]. @@ -147,8 +150,10 @@ prefer_factor = 3 Badness of processes whose names correspond to avoid_regex will be calculated by the following formula: badness = oom_score / avoid_factor + + # Need more examples -avoid_regex = Xorg|sshd +avoid_regex = ^(sshd|Xorg)$ Valid values are floating-point numbers from the range [1; 1000]. diff --git a/oom-sort b/oom-sort new file mode 100755 index 0000000..e48a2df --- /dev/null +++ b/oom-sort @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +""" +sort processes by oom_score +""" +from operator import itemgetter +from os import listdir + +def human(num): + '''KiB to MiB''' + return str(round(num / 1024.0)).rjust(6, ' ') + + +with open('/proc/self/status') as file: + status_list = file.readlines() + +# список имен из /proc/*/status для дальнейшего поиска позиций VmRSS and VmSwap +status_names = [] +for s in status_list: + status_names.append(s.split(':')[0]) + +vm_rss_index = status_names.index('VmRSS') +vm_swap_index = status_names.index('VmSwap') +uid_index = status_names.index('Uid') + +oom_list = [] +for pid in listdir('/proc'): + # пропускаем элементы, состоящие не из цифр и PID 1 + if pid.isdigit() is not True or pid == '1': + continue + try: + with open('/proc/' + pid + '/cmdline') as file: + try: + cmdline = file.readlines()[0].replace('\x00', ' ') + except IndexError: + continue + with open('/proc/' + pid + '/oom_score') as file: + oom_score = int(file.readlines()[0][:-1]) + with open('/proc/' + pid + '/oom_score_adj') as file: + oom_score_adj = int(file.readlines()[0][:-1]) + except FileNotFoundError: + continue + except ProcessLookupError: + continue + + oom_list.append((pid, oom_score, oom_score_adj, cmdline)) + +# list sorted by oom_score +oom_list_sorted = sorted(oom_list, key=itemgetter(1), reverse=True) + +print('oom_score oom_score_adj Uid Pid Name VmRSS VmSwap cmdline') +print('--------- ------------- ----- ----- --------------- -------- -------- -------') + +# итерируемся по сортированному списку oom_score, печатая name, pid etc +for i in oom_list_sorted: + pid = i[0] + oom_score = i[1] + oom_score_adj = i[2] + cmdline = i[3].strip() + + try: + # читать часть файла не дальше VmSwap - когда-нибудь + with open('/proc/' + pid + '/status') as file: + status_list = file.readlines() + + vm_rss = int(status_list[vm_rss_index].split(':')[1].split(' ')[-2]) + vm_swap = int(status_list[vm_swap_index].split(':')[1].split(' ')[-2]) + name = status_list[0][:-1].split('\t')[1] + uid = status_list[uid_index].split('\t')[1] + + except FileNotFoundError: + continue + + except ProcessLookupError: + continue + + print( + '{} {} {} {} {} {} M {} M {}'.format( + str(oom_score).rjust(9), + str(oom_score_adj).rjust(13), + uid.rjust(5), + str(pid).rjust(5), + name.ljust(15), + human(vm_rss), + human(vm_swap), + cmdline[:] + ) + ) diff --git a/oom-top b/oom-top deleted file mode 100755 index 0deec2b..0000000 --- a/oom-top +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 - -# top oom_score - -from time import sleep -from operator import itemgetter -from os import listdir - -period = 1 -num_lines = 20 -oom_score_min_value = 0 - -# перевод кило в мегабайты и выравнивание по правому краю -def human(num): - return str(round(num / 1024.0)).rjust(7, ' ') - -with open('/proc/self/status') as file: - status_list = file.readlines() - -# список имен из /proc/*/status для дальнейшего поиска позиций VmRSS and VmSwap -status_names = [] -for s in status_list: - status_names.append(s.split(':')[0]) - -vm_rss_index = status_names.index('VmRSS') -vm_swap_index = status_names.index('VmSwap') - - -while True: - - oom_list = [] - - # цикл для наполнения oom_list - for i in listdir('/proc'): - - # пропускаем элементы, состоящие не из цифр - if i.isdigit() is not True: - continue - - try: - - with open('/proc/' + i + '/oom_score') as file: - oom_score = int(file.readlines()[0][:-1]) - - with open('/proc/' + i + '/oom_adj') as file: - oom_adj = int(file.readlines()[0][:-1]) - - with open('/proc/' + i + '/oom_score_adj') as file: - oom_score_adj = int(file.readlines()[0][:-1]) - - except FileNotFoundError: - pass - except ProcessLookupError: - pass - - oom_list.append((i, oom_score, oom_adj, oom_score_adj)) - - # получаем сортированный список oom_score - oom_list_sorted = sorted(oom_list, key=itemgetter(1), reverse=True) - - # если общее число процессов меньше num_lines - приравниваем последний к числу процессов - if len(oom_list_sorted) < num_lines: - num_lines = len(oom_list_sorted) - 1 - - - print('\033coom_score oom_adj oom_score_adj Pid Name RSS Swap') - print('--------- ------- ------------- ----- --------------- --------- ---------') - - # итерируемся по сортированному списку oom_score, печатая name, pid etc - for i in oom_list_sorted[:num_lines]: - - pid = i[0] - oom_score = i[1] - oom_adj = i[2] - oom_score_adj = i[3] - - try: - - # читать часть файла не дальше VmSwap - with open('/proc/' + pid + '/status') as file: - status_list = file.readlines() - - vm_rss = int(status_list[vm_rss_index].split(':')[1].split(' ')[-2]) - vm_swap = int(status_list[vm_swap_index].split(':')[1].split(' ')[-2]) - name = status_list[0][:-1].split('\t')[1] - - # потоки ядра - except IndexError: - vm_rss = 0 - vm_swap = 0 - name = status_list[0][:-1].split('\t')[1] - - except FileNotFoundError: - vm_rss = 0 - vm_swap = 0 - name = '' - - except ProcessLookupError: - vm_rss = 0 - vm_swap = 0 - name = '' - - if oom_score >= oom_score_min_value: - - print( - '{} {} {} {} {} {} M {} M'.format( - str(oom_score).rjust(9), - str(oom_adj).rjust(7), - str(oom_score_adj).rjust(13), - str(pid).rjust(5), - name.ljust(15), - human(vm_rss), - human(vm_swap), - ) - ) - - sleep(period) -