improve output and regex matching, add oom-sort
This commit is contained in:
parent
14ea6652d3
commit
62dd52b7fd
17
README.md
17
README.md
@ -114,7 +114,7 @@ The program can be configured by editing the [config file](https://github.com/ha
|
||||
1. Memory levels to respond to as an OOM threat
|
||||
2. The frequency of checking the level of available memory (and CPU usage)
|
||||
3. The prevention of killing innocent victims
|
||||
4. Impact on the badness of processes via matching their names with regular expressions
|
||||
4. Impact on the badness of processes via matching their names and cmdlines with regular expressions
|
||||
5. The execution of a specific command instead of sending the SIGTERM signal
|
||||
6. GUI notifications:
|
||||
- results of preventing OOM
|
||||
@ -125,17 +125,17 @@ The program can be configured by editing the [config file](https://github.com/ha
|
||||
Just read the description of the parameters and edit the values. Please restart nohang to apply changes. Default path to the config after installing is `/etc/nohang/nohang.conf`.
|
||||
|
||||
|
||||
## oom-top
|
||||
## oom-sort
|
||||
|
||||
`oom-top` is an additional diagnostic tool from the nohang package. It sorts the processes in descending order of their oom_score and also displays oom_score_adj, Pid, Name, VmRSS, VmSwap. It will be installed together with nohang.
|
||||
`oom-sort` is an additional diagnostic tool that will be installed with `nohang` package. It sorts the processes in descending order of their oom_score and also displays oom_score_adj, Uid, Pid, Name, VmRSS, VmSwap and optionally cmdline. Run `oom-sort --help` for more info.
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
$ oom-top
|
||||
$ oom-sort
|
||||
```
|
||||
|
||||
Output like this (monitors top 20 processes with period = 1 sec):
|
||||
Output like follow:
|
||||
|
||||
```
|
||||
oom_score oom_adj oom_score_adj Pid Name RSS Swap
|
||||
@ -170,11 +170,14 @@ See also `man journalctl`.
|
||||
|
||||
Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, feature requests and any questions are welcome.
|
||||
|
||||
|
||||
## Changelog
|
||||
|
||||
- In progress
|
||||
- add oom-top
|
||||
- add oom-sort
|
||||
- add oom-trigger
|
||||
- fix regex matching: replace re.fullmatch() by re.search()
|
||||
- add suppot cmdline regex matching
|
||||
- improve output: display cmdline and Uid in corrective action reports
|
||||
|
||||
- [v0.1](https://github.com/hakavlad/nohang/releases/tag/v0.1), 2018-11-23
|
||||
- 1st release
|
||||
|
133
nohang
133
nohang
@ -8,6 +8,10 @@ from argparse import ArgumentParser
|
||||
from sys import stdout
|
||||
|
||||
|
||||
import re
|
||||
|
||||
vm_rss = 9
|
||||
|
||||
sig_dict = {signal.SIGKILL: 'SIGKILL',
|
||||
signal.SIGTERM: 'SIGTERM'}
|
||||
|
||||
@ -222,6 +226,28 @@ def pid_to_name(pid):
|
||||
return '<unknown>'
|
||||
|
||||
|
||||
|
||||
def pid_to_cmdline(pid):
|
||||
"""
|
||||
Get process cmdline by pid.
|
||||
|
||||
pid: str pid of required process
|
||||
returns string cmdline
|
||||
"""
|
||||
with open('/proc/' + pid + '/cmdline') as file:
|
||||
try:
|
||||
return file.readlines()[0].replace('\x00', ' ').strip()
|
||||
except IndexError:
|
||||
return ''
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def send_notify_warn():
|
||||
"""
|
||||
Look for process with maximum 'badness' and warn user with notification.
|
||||
@ -266,7 +292,9 @@ def send_notify(signal, name, pid):
|
||||
"""
|
||||
title = 'Preventing OOM'
|
||||
body = '<b>{}</b> process <b>{}</b>, <b>{}</b>'.format(
|
||||
notify_sig_dict[signal], pid, name.replace('&', '*'))
|
||||
notify_sig_dict[signal], pid, name.replace(
|
||||
# & может ломать уведомления в некоторых темах оформления notify-send, поэтому заменяется на *
|
||||
'&', '*'))
|
||||
if root:
|
||||
# send notification to all active users with notify-send
|
||||
b = root_notify_env()
|
||||
@ -324,37 +352,77 @@ def sleep_after_send_signal(signal):
|
||||
sleep(min_delay_after_sigterm)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def find_victim_and_send_signal(signal):
|
||||
"""
|
||||
Find victim with highest badness and send SIGTERM/SIGKILL
|
||||
"""
|
||||
if decrease_oom_score_adj and root:
|
||||
# это не оптимальное решение
|
||||
func_decrease_oom_score_adj(oom_score_adj_max)
|
||||
|
||||
pid_badness_list = []
|
||||
|
||||
# not implemented, in progress
|
||||
prefer_re_cmdline = ''
|
||||
prefer_cmd_factor = 1
|
||||
|
||||
|
||||
if regex_matching:
|
||||
|
||||
|
||||
for pid in os.listdir('/proc'):
|
||||
if pid[0].isdecimal() is not True:
|
||||
# только директории, имена которых состоят только из цифр, за исключением /proc/1/
|
||||
if pid[0].isdecimal() is not True or pid == '1':
|
||||
continue
|
||||
|
||||
try:
|
||||
badness = int(rline1('/proc/' + pid + '/oom_score'))
|
||||
name = pid_to_name(pid)
|
||||
|
||||
if fullmatch(avoid_regex, name) is not None:
|
||||
|
||||
name = pid_to_name(pid)
|
||||
cmdline = pid_to_cmdline(pid)
|
||||
# отсеять потоки ядра
|
||||
if cmdline == '':
|
||||
continue
|
||||
|
||||
#print([pid], [name], [cmdline])
|
||||
|
||||
if re.search(avoid_regex, name) is not None:
|
||||
badness = int(badness / avoid_factor)
|
||||
|
||||
if fullmatch(prefer_regex, name) is not None:
|
||||
if re.search(prefer_regex, name) is not None:
|
||||
badness = int((badness + 1) * prefer_factor)
|
||||
|
||||
|
||||
|
||||
if re.search(avoid_re_cmdline, cmdline) is not None:
|
||||
badness = int(badness / avoid_factor)
|
||||
|
||||
|
||||
if re.search(prefer_re_cmdline, cmdline) is not None:
|
||||
badness = int((badness + 1) * prefer_cmd_factor)
|
||||
|
||||
|
||||
except FileNotFoundError:
|
||||
badness = 0
|
||||
except ProcessLookupError:
|
||||
badness = 0
|
||||
pid_badness_list.append((pid, badness))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
else:
|
||||
|
||||
for pid in os.listdir('/proc'):
|
||||
@ -380,45 +448,48 @@ def find_victim_and_send_signal(signal):
|
||||
pid = pid_tuple_list[0]
|
||||
name = pid_to_name(pid)
|
||||
|
||||
# Get VmRSS and VmSwap of victim process and try to send signal
|
||||
# Get VmRSS and VmSwap and cmdline of victim process and try to send signal
|
||||
try:
|
||||
with open('/proc/' + pid + '/status') as f:
|
||||
for n, line in enumerate(f):
|
||||
if n is uid_index:
|
||||
uid = line.split('\t')[1]
|
||||
continue
|
||||
if n is vm_rss_index:
|
||||
vm_rss = kib_to_mib(int(
|
||||
line.split('\t')[1][:-4]))
|
||||
vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
|
||||
continue
|
||||
if n is vm_swap_index:
|
||||
vm_swap = kib_to_mib(int(
|
||||
line.split('\t')[1][:-4]))
|
||||
vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
|
||||
break
|
||||
with open('/proc/' + pid + '/cmdline') as file:
|
||||
try:
|
||||
cmdline = file.readlines()[0].replace('\x00', ' ')
|
||||
except IndexError:
|
||||
cmdline = ''
|
||||
|
||||
except FileNotFoundError:
|
||||
vm_rss = 0
|
||||
vm_swap = 0
|
||||
pass
|
||||
|
||||
|
||||
# исключение - значит вообще не надо бить этот процесс. Или искать новый, или принт: процесс хуёвый, стоп!
|
||||
|
||||
except ProcessLookupError:
|
||||
vm_rss = 0
|
||||
vm_swap = 0
|
||||
pass
|
||||
except IndexError:
|
||||
vm_rss = 0
|
||||
vm_swap = 0
|
||||
pass
|
||||
except ValueError:
|
||||
vm_rss = 0
|
||||
vm_swap = 0
|
||||
pass
|
||||
|
||||
if execute_the_command and signal is signal.SIGTERM and name in etc_dict:
|
||||
command = etc_dict[name]
|
||||
exit_status = os.system(etc_dict[name])
|
||||
response_time = time() - time0
|
||||
etc_info = ''' Finding the process with the highest badness\n
|
||||
Victim is {}, pid:{}, badness:{}, VmRSS: {} MiB, VmSwap: {} MiB\n
|
||||
Execute the command: {}\n
|
||||
Exit status: {}; response time: {} ms'''.format(
|
||||
name, pid, victim_badness, vm_rss, vm_swap, command,
|
||||
exit_status, round(response_time * 1000))
|
||||
|
||||
etc_info = ''' Found the victim with highest badness:\n Name: {}\n Pid: {}\n Uid: {}\n Badness: {}\n VmRSS: {} MiB\n VmSwap: {} MiB\n Execute the command: {}\n Exit status: {}; response time: {} ms'''.format(name, pid, uid, victim_badness, vm_rss, vm_swap, command, exit_status, round(response_time * 1000))
|
||||
|
||||
print(mem_info)
|
||||
print(etc_info)
|
||||
if gui_notifications:
|
||||
send_notify_etc(pid, name, command)
|
||||
if gui_notifications: send_notify_etc(pid, name, command)
|
||||
|
||||
else:
|
||||
|
||||
@ -440,11 +511,8 @@ def find_victim_and_send_signal(signal):
|
||||
send_result = 'no such process; response time: {} ms'.format(
|
||||
round(response_time * 1000))
|
||||
|
||||
preventing_oom_message = ' Finding the process with the highes' \
|
||||
't badness\n Victim is {}, pid: {}, badness: {}, VmRS' \
|
||||
'S: {} MiB, VmSwap: {} MiB\n Sending {} to the victim; {}'.format(
|
||||
name, pid, victim_badness, vm_rss, vm_swap, sig_dict[signal],
|
||||
send_result)
|
||||
preventing_oom_message = ' Found the process with highest badness:\n Name: {}\n Pid: {}\n Uid: {}\n Badness: {}\n VmRSS: {} MiB\n VmSwap: {} MiB\n Cmdline: {}\n Sending {} to the victim; {}'.format(
|
||||
name, pid, uid, victim_badness, vm_rss, vm_swap, cmdline, sig_dict[signal], send_result)
|
||||
print(mem_info)
|
||||
print(preventing_oom_message)
|
||||
|
||||
@ -629,7 +697,7 @@ for s in status_list:
|
||||
|
||||
vm_rss_index = status_names.index('VmRSS')
|
||||
vm_swap_index = status_names.index('VmSwap')
|
||||
|
||||
uid_index = status_names.index('Uid')
|
||||
|
||||
##########################################################################
|
||||
|
||||
@ -751,6 +819,7 @@ execute_the_command = conf_parse_bool('execute_the_command')
|
||||
prefer_regex = conf_parse_string('prefer_regex')
|
||||
avoid_regex = conf_parse_string('avoid_regex')
|
||||
|
||||
|
||||
mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent = calculate_percent(
|
||||
'mem_min_sigterm')
|
||||
mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent = calculate_percent(
|
||||
|
11
nohang.conf
11
nohang.conf
@ -132,13 +132,16 @@ oom_score_adj_max = 30
|
||||
|
||||
Valid values are True and False.
|
||||
|
||||
regex_matching = False
|
||||
regex_matching = True
|
||||
|
||||
Badness of processes whose names correspond to prefer_regex will
|
||||
be calculated by the following formula:
|
||||
badness = (oom_score + 1) * prefer_factor
|
||||
|
||||
prefer_regex = tail|python3
|
||||
prefer_regex =
|
||||
|
||||
|
||||
# prefer_re_cmdline = ^/usr/lib/firefox # not implemented, in progress
|
||||
|
||||
Valid values are floating-point numbers from the range [1; 1000].
|
||||
|
||||
@ -147,8 +150,10 @@ prefer_factor = 3
|
||||
Badness of processes whose names correspond to avoid_regex will
|
||||
be calculated by the following formula:
|
||||
badness = oom_score / avoid_factor
|
||||
|
||||
# Need more examples
|
||||
|
||||
avoid_regex = Xorg|sshd
|
||||
avoid_regex = ^(sshd|Xorg)$
|
||||
|
||||
Valid values are floating-point numbers from the range [1; 1000].
|
||||
|
||||
|
87
oom-sort
Executable file
87
oom-sort
Executable file
@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
sort processes by oom_score
|
||||
"""
|
||||
from operator import itemgetter
|
||||
from os import listdir
|
||||
|
||||
def human(num):
|
||||
'''KiB to MiB'''
|
||||
return str(round(num / 1024.0)).rjust(6, ' ')
|
||||
|
||||
|
||||
with open('/proc/self/status') as file:
|
||||
status_list = file.readlines()
|
||||
|
||||
# список имен из /proc/*/status для дальнейшего поиска позиций VmRSS and VmSwap
|
||||
status_names = []
|
||||
for s in status_list:
|
||||
status_names.append(s.split(':')[0])
|
||||
|
||||
vm_rss_index = status_names.index('VmRSS')
|
||||
vm_swap_index = status_names.index('VmSwap')
|
||||
uid_index = status_names.index('Uid')
|
||||
|
||||
oom_list = []
|
||||
for pid in listdir('/proc'):
|
||||
# пропускаем элементы, состоящие не из цифр и PID 1
|
||||
if pid.isdigit() is not True or pid == '1':
|
||||
continue
|
||||
try:
|
||||
with open('/proc/' + pid + '/cmdline') as file:
|
||||
try:
|
||||
cmdline = file.readlines()[0].replace('\x00', ' ')
|
||||
except IndexError:
|
||||
continue
|
||||
with open('/proc/' + pid + '/oom_score') as file:
|
||||
oom_score = int(file.readlines()[0][:-1])
|
||||
with open('/proc/' + pid + '/oom_score_adj') as file:
|
||||
oom_score_adj = int(file.readlines()[0][:-1])
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
except ProcessLookupError:
|
||||
continue
|
||||
|
||||
oom_list.append((pid, oom_score, oom_score_adj, cmdline))
|
||||
|
||||
# list sorted by oom_score
|
||||
oom_list_sorted = sorted(oom_list, key=itemgetter(1), reverse=True)
|
||||
|
||||
print('oom_score oom_score_adj Uid Pid Name VmRSS VmSwap cmdline')
|
||||
print('--------- ------------- ----- ----- --------------- -------- -------- -------')
|
||||
|
||||
# итерируемся по сортированному списку oom_score, печатая name, pid etc
|
||||
for i in oom_list_sorted:
|
||||
pid = i[0]
|
||||
oom_score = i[1]
|
||||
oom_score_adj = i[2]
|
||||
cmdline = i[3].strip()
|
||||
|
||||
try:
|
||||
# читать часть файла не дальше VmSwap - когда-нибудь
|
||||
with open('/proc/' + pid + '/status') as file:
|
||||
status_list = file.readlines()
|
||||
|
||||
vm_rss = int(status_list[vm_rss_index].split(':')[1].split(' ')[-2])
|
||||
vm_swap = int(status_list[vm_swap_index].split(':')[1].split(' ')[-2])
|
||||
name = status_list[0][:-1].split('\t')[1]
|
||||
uid = status_list[uid_index].split('\t')[1]
|
||||
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
|
||||
except ProcessLookupError:
|
||||
continue
|
||||
|
||||
print(
|
||||
'{} {} {} {} {} {} M {} M {}'.format(
|
||||
str(oom_score).rjust(9),
|
||||
str(oom_score_adj).rjust(13),
|
||||
uid.rjust(5),
|
||||
str(pid).rjust(5),
|
||||
name.ljust(15),
|
||||
human(vm_rss),
|
||||
human(vm_swap),
|
||||
cmdline[:]
|
||||
)
|
||||
)
|
118
oom-top
118
oom-top
@ -1,118 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# top oom_score
|
||||
|
||||
from time import sleep
|
||||
from operator import itemgetter
|
||||
from os import listdir
|
||||
|
||||
period = 1
|
||||
num_lines = 20
|
||||
oom_score_min_value = 0
|
||||
|
||||
# перевод кило в мегабайты и выравнивание по правому краю
|
||||
def human(num):
|
||||
return str(round(num / 1024.0)).rjust(7, ' ')
|
||||
|
||||
with open('/proc/self/status') as file:
|
||||
status_list = file.readlines()
|
||||
|
||||
# список имен из /proc/*/status для дальнейшего поиска позиций VmRSS and VmSwap
|
||||
status_names = []
|
||||
for s in status_list:
|
||||
status_names.append(s.split(':')[0])
|
||||
|
||||
vm_rss_index = status_names.index('VmRSS')
|
||||
vm_swap_index = status_names.index('VmSwap')
|
||||
|
||||
|
||||
while True:
|
||||
|
||||
oom_list = []
|
||||
|
||||
# цикл для наполнения oom_list
|
||||
for i in listdir('/proc'):
|
||||
|
||||
# пропускаем элементы, состоящие не из цифр
|
||||
if i.isdigit() is not True:
|
||||
continue
|
||||
|
||||
try:
|
||||
|
||||
with open('/proc/' + i + '/oom_score') as file:
|
||||
oom_score = int(file.readlines()[0][:-1])
|
||||
|
||||
with open('/proc/' + i + '/oom_adj') as file:
|
||||
oom_adj = int(file.readlines()[0][:-1])
|
||||
|
||||
with open('/proc/' + i + '/oom_score_adj') as file:
|
||||
oom_score_adj = int(file.readlines()[0][:-1])
|
||||
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
|
||||
oom_list.append((i, oom_score, oom_adj, oom_score_adj))
|
||||
|
||||
# получаем сортированный список oom_score
|
||||
oom_list_sorted = sorted(oom_list, key=itemgetter(1), reverse=True)
|
||||
|
||||
# если общее число процессов меньше num_lines - приравниваем последний к числу процессов
|
||||
if len(oom_list_sorted) < num_lines:
|
||||
num_lines = len(oom_list_sorted) - 1
|
||||
|
||||
|
||||
print('\033coom_score oom_adj oom_score_adj Pid Name RSS Swap')
|
||||
print('--------- ------- ------------- ----- --------------- --------- ---------')
|
||||
|
||||
# итерируемся по сортированному списку oom_score, печатая name, pid etc
|
||||
for i in oom_list_sorted[:num_lines]:
|
||||
|
||||
pid = i[0]
|
||||
oom_score = i[1]
|
||||
oom_adj = i[2]
|
||||
oom_score_adj = i[3]
|
||||
|
||||
try:
|
||||
|
||||
# читать часть файла не дальше VmSwap
|
||||
with open('/proc/' + pid + '/status') as file:
|
||||
status_list = file.readlines()
|
||||
|
||||
vm_rss = int(status_list[vm_rss_index].split(':')[1].split(' ')[-2])
|
||||
vm_swap = int(status_list[vm_swap_index].split(':')[1].split(' ')[-2])
|
||||
name = status_list[0][:-1].split('\t')[1]
|
||||
|
||||
# потоки ядра
|
||||
except IndexError:
|
||||
vm_rss = 0
|
||||
vm_swap = 0
|
||||
name = status_list[0][:-1].split('\t')[1]
|
||||
|
||||
except FileNotFoundError:
|
||||
vm_rss = 0
|
||||
vm_swap = 0
|
||||
name = '<unknown, dead>'
|
||||
|
||||
except ProcessLookupError:
|
||||
vm_rss = 0
|
||||
vm_swap = 0
|
||||
name = '<unknown, dead>'
|
||||
|
||||
if oom_score >= oom_score_min_value:
|
||||
|
||||
print(
|
||||
'{} {} {} {} {} {} M {} M'.format(
|
||||
str(oom_score).rjust(9),
|
||||
str(oom_adj).rjust(7),
|
||||
str(oom_score_adj).rjust(13),
|
||||
str(pid).rjust(5),
|
||||
name.ljust(15),
|
||||
human(vm_rss),
|
||||
human(vm_swap),
|
||||
)
|
||||
)
|
||||
|
||||
sleep(period)
|
||||
|
Loading…
Reference in New Issue
Block a user