improve output and regex matching, add oom-sort

This commit is contained in:
Alexey Avramov 2018-12-02 18:56:29 +09:00
parent 14ea6652d3
commit 62dd52b7fd
5 changed files with 206 additions and 160 deletions

View File

@ -114,7 +114,7 @@ The program can be configured by editing the [config file](https://github.com/ha
1. Memory levels to respond to as an OOM threat
2. The frequency of checking the level of available memory (and CPU usage)
3. The prevention of killing innocent victims
4. Impact on the badness of processes via matching their names with regular expressions
4. Impact on the badness of processes via matching their names and cmdlines with regular expressions
5. The execution of a specific command instead of sending the SIGTERM signal
6. GUI notifications:
- results of preventing OOM
@ -125,17 +125,17 @@ The program can be configured by editing the [config file](https://github.com/ha
Just read the description of the parameters and edit the values. Please restart nohang to apply changes. Default path to the config after installing is `/etc/nohang/nohang.conf`.
## oom-top
## oom-sort
`oom-top` is an additional diagnostic tool from the nohang package. It sorts the processes in descending order of their oom_score and also displays oom_score_adj, Pid, Name, VmRSS, VmSwap. It will be installed together with nohang.
`oom-sort` is an additional diagnostic tool that will be installed with `nohang` package. It sorts the processes in descending order of their oom_score and also displays oom_score_adj, Uid, Pid, Name, VmRSS, VmSwap and optionally cmdline. Run `oom-sort --help` for more info.
Usage:
```
$ oom-top
$ oom-sort
```
Output like this (monitors top 20 processes with period = 1 sec):
Output like follow:
```
oom_score oom_adj oom_score_adj Pid Name RSS Swap
@ -170,11 +170,14 @@ See also `man journalctl`.
Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, feature requests and any questions are welcome.
## Changelog
- In progress
- add oom-top
- add oom-sort
- add oom-trigger
- fix regex matching: replace re.fullmatch() by re.search()
- add suppot cmdline regex matching
- improve output: display cmdline and Uid in corrective action reports
- [v0.1](https://github.com/hakavlad/nohang/releases/tag/v0.1), 2018-11-23
- 1st release

133
nohang
View File

@ -8,6 +8,10 @@ from argparse import ArgumentParser
from sys import stdout
import re
vm_rss = 9
sig_dict = {signal.SIGKILL: 'SIGKILL',
signal.SIGTERM: 'SIGTERM'}
@ -222,6 +226,28 @@ def pid_to_name(pid):
return '<unknown>'
def pid_to_cmdline(pid):
"""
Get process cmdline by pid.
pid: str pid of required process
returns string cmdline
"""
with open('/proc/' + pid + '/cmdline') as file:
try:
return file.readlines()[0].replace('\x00', ' ').strip()
except IndexError:
return ''
def send_notify_warn():
"""
Look for process with maximum 'badness' and warn user with notification.
@ -266,7 +292,9 @@ def send_notify(signal, name, pid):
"""
title = 'Preventing OOM'
body = '<b>{}</b> process <b>{}</b>, <b>{}</b>'.format(
notify_sig_dict[signal], pid, name.replace('&', '*'))
notify_sig_dict[signal], pid, name.replace(
# & может ломать уведомления в некоторых темах оформления notify-send, поэтому заменяется на *
'&', '*'))
if root:
# send notification to all active users with notify-send
b = root_notify_env()
@ -324,37 +352,77 @@ def sleep_after_send_signal(signal):
sleep(min_delay_after_sigterm)
def find_victim_and_send_signal(signal):
"""
Find victim with highest badness and send SIGTERM/SIGKILL
"""
if decrease_oom_score_adj and root:
# это не оптимальное решение
func_decrease_oom_score_adj(oom_score_adj_max)
pid_badness_list = []
# not implemented, in progress
prefer_re_cmdline = ''
prefer_cmd_factor = 1
if regex_matching:
for pid in os.listdir('/proc'):
if pid[0].isdecimal() is not True:
# только директории, имена которых состоят только из цифр, за исключением /proc/1/
if pid[0].isdecimal() is not True or pid == '1':
continue
try:
badness = int(rline1('/proc/' + pid + '/oom_score'))
name = pid_to_name(pid)
if fullmatch(avoid_regex, name) is not None:
name = pid_to_name(pid)
cmdline = pid_to_cmdline(pid)
# отсеять потоки ядра
if cmdline == '':
continue
#print([pid], [name], [cmdline])
if re.search(avoid_regex, name) is not None:
badness = int(badness / avoid_factor)
if fullmatch(prefer_regex, name) is not None:
if re.search(prefer_regex, name) is not None:
badness = int((badness + 1) * prefer_factor)
if re.search(avoid_re_cmdline, cmdline) is not None:
badness = int(badness / avoid_factor)
if re.search(prefer_re_cmdline, cmdline) is not None:
badness = int((badness + 1) * prefer_cmd_factor)
except FileNotFoundError:
badness = 0
except ProcessLookupError:
badness = 0
pid_badness_list.append((pid, badness))
else:
for pid in os.listdir('/proc'):
@ -380,45 +448,48 @@ def find_victim_and_send_signal(signal):
pid = pid_tuple_list[0]
name = pid_to_name(pid)
# Get VmRSS and VmSwap of victim process and try to send signal
# Get VmRSS and VmSwap and cmdline of victim process and try to send signal
try:
with open('/proc/' + pid + '/status') as f:
for n, line in enumerate(f):
if n is uid_index:
uid = line.split('\t')[1]
continue
if n is vm_rss_index:
vm_rss = kib_to_mib(int(
line.split('\t')[1][:-4]))
vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if n is vm_swap_index:
vm_swap = kib_to_mib(int(
line.split('\t')[1][:-4]))
vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
break
with open('/proc/' + pid + '/cmdline') as file:
try:
cmdline = file.readlines()[0].replace('\x00', ' ')
except IndexError:
cmdline = ''
except FileNotFoundError:
vm_rss = 0
vm_swap = 0
pass
# исключение - значит вообще не надо бить этот процесс. Или искать новый, или принт: процесс хуёвый, стоп!
except ProcessLookupError:
vm_rss = 0
vm_swap = 0
pass
except IndexError:
vm_rss = 0
vm_swap = 0
pass
except ValueError:
vm_rss = 0
vm_swap = 0
pass
if execute_the_command and signal is signal.SIGTERM and name in etc_dict:
command = etc_dict[name]
exit_status = os.system(etc_dict[name])
response_time = time() - time0
etc_info = ''' Finding the process with the highest badness\n
Victim is {}, pid:{}, badness:{}, VmRSS: {} MiB, VmSwap: {} MiB\n
Execute the command: {}\n
Exit status: {}; response time: {} ms'''.format(
name, pid, victim_badness, vm_rss, vm_swap, command,
exit_status, round(response_time * 1000))
etc_info = ''' Found the victim with highest badness:\n Name: {}\n Pid: {}\n Uid: {}\n Badness: {}\n VmRSS: {} MiB\n VmSwap: {} MiB\n Execute the command: {}\n Exit status: {}; response time: {} ms'''.format(name, pid, uid, victim_badness, vm_rss, vm_swap, command, exit_status, round(response_time * 1000))
print(mem_info)
print(etc_info)
if gui_notifications:
send_notify_etc(pid, name, command)
if gui_notifications: send_notify_etc(pid, name, command)
else:
@ -440,11 +511,8 @@ def find_victim_and_send_signal(signal):
send_result = 'no such process; response time: {} ms'.format(
round(response_time * 1000))
preventing_oom_message = ' Finding the process with the highes' \
't badness\n Victim is {}, pid: {}, badness: {}, VmRS' \
'S: {} MiB, VmSwap: {} MiB\n Sending {} to the victim; {}'.format(
name, pid, victim_badness, vm_rss, vm_swap, sig_dict[signal],
send_result)
preventing_oom_message = ' Found the process with highest badness:\n Name: {}\n Pid: {}\n Uid: {}\n Badness: {}\n VmRSS: {} MiB\n VmSwap: {} MiB\n Cmdline: {}\n Sending {} to the victim; {}'.format(
name, pid, uid, victim_badness, vm_rss, vm_swap, cmdline, sig_dict[signal], send_result)
print(mem_info)
print(preventing_oom_message)
@ -629,7 +697,7 @@ for s in status_list:
vm_rss_index = status_names.index('VmRSS')
vm_swap_index = status_names.index('VmSwap')
uid_index = status_names.index('Uid')
##########################################################################
@ -751,6 +819,7 @@ execute_the_command = conf_parse_bool('execute_the_command')
prefer_regex = conf_parse_string('prefer_regex')
avoid_regex = conf_parse_string('avoid_regex')
mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent = calculate_percent(
'mem_min_sigterm')
mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent = calculate_percent(

View File

@ -132,13 +132,16 @@ oom_score_adj_max = 30
Valid values are True and False.
regex_matching = False
regex_matching = True
Badness of processes whose names correspond to prefer_regex will
be calculated by the following formula:
badness = (oom_score + 1) * prefer_factor
prefer_regex = tail|python3
prefer_regex =
# prefer_re_cmdline = ^/usr/lib/firefox # not implemented, in progress
Valid values are floating-point numbers from the range [1; 1000].
@ -147,8 +150,10 @@ prefer_factor = 3
Badness of processes whose names correspond to avoid_regex will
be calculated by the following formula:
badness = oom_score / avoid_factor
# Need more examples
avoid_regex = Xorg|sshd
avoid_regex = ^(sshd|Xorg)$
Valid values are floating-point numbers from the range [1; 1000].

87
oom-sort Executable file
View File

@ -0,0 +1,87 @@
#!/usr/bin/env python3
"""
sort processes by oom_score
"""
from operator import itemgetter
from os import listdir
def human(num):
'''KiB to MiB'''
return str(round(num / 1024.0)).rjust(6, ' ')
with open('/proc/self/status') as file:
status_list = file.readlines()
# список имен из /proc/*/status для дальнейшего поиска позиций VmRSS and VmSwap
status_names = []
for s in status_list:
status_names.append(s.split(':')[0])
vm_rss_index = status_names.index('VmRSS')
vm_swap_index = status_names.index('VmSwap')
uid_index = status_names.index('Uid')
oom_list = []
for pid in listdir('/proc'):
# пропускаем элементы, состоящие не из цифр и PID 1
if pid.isdigit() is not True or pid == '1':
continue
try:
with open('/proc/' + pid + '/cmdline') as file:
try:
cmdline = file.readlines()[0].replace('\x00', ' ')
except IndexError:
continue
with open('/proc/' + pid + '/oom_score') as file:
oom_score = int(file.readlines()[0][:-1])
with open('/proc/' + pid + '/oom_score_adj') as file:
oom_score_adj = int(file.readlines()[0][:-1])
except FileNotFoundError:
continue
except ProcessLookupError:
continue
oom_list.append((pid, oom_score, oom_score_adj, cmdline))
# list sorted by oom_score
oom_list_sorted = sorted(oom_list, key=itemgetter(1), reverse=True)
print('oom_score oom_score_adj Uid Pid Name VmRSS VmSwap cmdline')
print('--------- ------------- ----- ----- --------------- -------- -------- -------')
# итерируемся по сортированному списку oom_score, печатая name, pid etc
for i in oom_list_sorted:
pid = i[0]
oom_score = i[1]
oom_score_adj = i[2]
cmdline = i[3].strip()
try:
# читать часть файла не дальше VmSwap - когда-нибудь
with open('/proc/' + pid + '/status') as file:
status_list = file.readlines()
vm_rss = int(status_list[vm_rss_index].split(':')[1].split(' ')[-2])
vm_swap = int(status_list[vm_swap_index].split(':')[1].split(' ')[-2])
name = status_list[0][:-1].split('\t')[1]
uid = status_list[uid_index].split('\t')[1]
except FileNotFoundError:
continue
except ProcessLookupError:
continue
print(
'{} {} {} {} {} {} M {} M {}'.format(
str(oom_score).rjust(9),
str(oom_score_adj).rjust(13),
uid.rjust(5),
str(pid).rjust(5),
name.ljust(15),
human(vm_rss),
human(vm_swap),
cmdline[:]
)
)

118
oom-top
View File

@ -1,118 +0,0 @@
#!/usr/bin/env python3
# top oom_score
from time import sleep
from operator import itemgetter
from os import listdir
period = 1
num_lines = 20
oom_score_min_value = 0
# перевод кило в мегабайты и выравнивание по правому краю
def human(num):
return str(round(num / 1024.0)).rjust(7, ' ')
with open('/proc/self/status') as file:
status_list = file.readlines()
# список имен из /proc/*/status для дальнейшего поиска позиций VmRSS and VmSwap
status_names = []
for s in status_list:
status_names.append(s.split(':')[0])
vm_rss_index = status_names.index('VmRSS')
vm_swap_index = status_names.index('VmSwap')
while True:
oom_list = []
# цикл для наполнения oom_list
for i in listdir('/proc'):
# пропускаем элементы, состоящие не из цифр
if i.isdigit() is not True:
continue
try:
with open('/proc/' + i + '/oom_score') as file:
oom_score = int(file.readlines()[0][:-1])
with open('/proc/' + i + '/oom_adj') as file:
oom_adj = int(file.readlines()[0][:-1])
with open('/proc/' + i + '/oom_score_adj') as file:
oom_score_adj = int(file.readlines()[0][:-1])
except FileNotFoundError:
pass
except ProcessLookupError:
pass
oom_list.append((i, oom_score, oom_adj, oom_score_adj))
# получаем сортированный список oom_score
oom_list_sorted = sorted(oom_list, key=itemgetter(1), reverse=True)
# если общее число процессов меньше num_lines - приравниваем последний к числу процессов
if len(oom_list_sorted) < num_lines:
num_lines = len(oom_list_sorted) - 1
print('\033coom_score oom_adj oom_score_adj Pid Name RSS Swap')
print('--------- ------- ------------- ----- --------------- --------- ---------')
# итерируемся по сортированному списку oom_score, печатая name, pid etc
for i in oom_list_sorted[:num_lines]:
pid = i[0]
oom_score = i[1]
oom_adj = i[2]
oom_score_adj = i[3]
try:
# читать часть файла не дальше VmSwap
with open('/proc/' + pid + '/status') as file:
status_list = file.readlines()
vm_rss = int(status_list[vm_rss_index].split(':')[1].split(' ')[-2])
vm_swap = int(status_list[vm_swap_index].split(':')[1].split(' ')[-2])
name = status_list[0][:-1].split('\t')[1]
# потоки ядра
except IndexError:
vm_rss = 0
vm_swap = 0
name = status_list[0][:-1].split('\t')[1]
except FileNotFoundError:
vm_rss = 0
vm_swap = 0
name = '<unknown, dead>'
except ProcessLookupError:
vm_rss = 0
vm_swap = 0
name = '<unknown, dead>'
if oom_score >= oom_score_min_value:
print(
'{} {} {} {} {} {} M {} M'.format(
str(oom_score).rjust(9),
str(oom_adj).rjust(7),
str(oom_score_adj).rjust(13),
str(pid).rjust(5),
name.ljust(15),
human(vm_rss),
human(vm_swap),
)
)
sleep(period)