fix oom-sort: add --sort option; fix uid len; fix VmRSS len

This commit is contained in:
Alexey Avramov 2019-01-30 07:27:32 +09:00
parent 191f749d03
commit 103b2975fd
3 changed files with 130 additions and 31 deletions

View File

@ -23,12 +23,12 @@ How can I prevent this in the future? Can't it at least keep a responsive core o
Also look at [Why are low memory conditions handled so badly?](https://www.reddit.com/r/linux/comments/56r4xj/why_are_low_memory_conditions_handled_so_badly/) (discussion with 480+ posts on r/linux). Also look at [Why are low memory conditions handled so badly?](https://www.reddit.com/r/linux/comments/56r4xj/why_are_low_memory_conditions_handled_so_badly/) (discussion with 480+ posts on r/linux).
## Solution ## Solution
- Use of [earlyoom](https://github.com/rfjakob/earlyoom). This is a very lightweight OOM preventer written in C. - Use of [earlyoom](https://github.com/rfjakob/earlyoom). This is a simple and very lightweight OOM preventer written in C (the best choice for emedded and old systems). It has a minimum dependencies and can work with oldest kernels.
- Use of [oomd](https://github.com/facebookincubator/oomd). This is a userspace OOM killer for linux systems whitten in C++ and developed by Facebook. - Use of [oomd](https://github.com/facebookincubator/oomd). This is a userspace OOM killer for linux systems whitten in C++ and developed by Facebook. Needs Linux 4.20+.
- Use of nohang. - Use of nohang (maybe this is a good choice for modern desktops and servers if you need fine tuning).
The tools listed above may work at the same time on one computer.
## Some features ## Some features
@ -67,6 +67,14 @@ To use `PSI` (pressure stall information):
Please use the latest [release version](https://github.com/hakavlad/nohang/releases). Current version may be unstable. Please use the latest [release version](https://github.com/hakavlad/nohang/releases). Current version may be unstable.
Please download the latest stable version (v0.1):
```bash
$ wget -ct0 https://github.com/hakavlad/nohang/archive/v0.1.tar.gz
$ tar xvzf v0.1.tar.gz
$ cd nohang-0.1
```
or clone the latest unstable:
```bash ```bash
$ git clone https://github.com/hakavlad/nohang.git $ git clone https://github.com/hakavlad/nohang.git
$ cd nohang $ cd nohang
@ -157,13 +165,24 @@ $ sudo journalctl -eu nohang
``` ```
See also `man journalctl`. See also `man journalctl`.
## Known problems ## Known problems
- Awful documentation - Awful documentation
- Slowly starting, slowly looking for a victim, especially when using swapspace (although this should be enough for more than 95% of all cases, IMHO) - Slowly starting, slowly looking for a victim, especially when using swapspace (although this should be enough for more than 95% of all cases, IMHO)
- It is written in an interpreted language and is actually a prototype - It is written in an interpreted language and is actually a prototype
## Todo
- Rewrite all code in Golang with tests and good documentation.
## Nohang don't help you
if you run
```bash
$ while true; do setsid /tail/dev/zero; done
```
## Contribution ## Contribution
Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, feature requests and any questions are welcome. Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, feature requests and any questions are welcome.

20
nohang
View File

@ -690,6 +690,15 @@ def find_victim_and_send_signal(signal):
else: else:
try: try:
t0 = time()
m = check_mem_and_swap()
ma = round(int(m[0]) / 1024.0)
sf = round(int(m[2]) / 1024.0)
print('\nMemory status before sending a signal:\nMemAv: {} MiB, SwFree: {} MiB'.format(ma, sf))
print(time() - t0)
os.kill(int(pid), signal) os.kill(int(pid), signal)
response_time = time() - time0 response_time = time() - time0
send_result = '\033[32mOK\033[0m; response time: {} ms'.format( send_result = '\033[32mOK\033[0m; response time: {} ms'.format(
@ -1358,6 +1367,17 @@ if psi_support and not ignore_psi:
avg_value = '' avg_value = ''
while True: while True:
if psi_support and not ignore_psi: if psi_support and not ignore_psi:

112
oom-sort
View File

@ -3,11 +3,11 @@
sort processes by oom_score sort processes by oom_score
""" """
from operator import itemgetter from operator import itemgetter
from os import listdir from os import listdir
from argparse import ArgumentParser from argparse import ArgumentParser
"""#######################################################################79""" """#######################################################################79"""
@ -63,14 +63,6 @@ def get_max_pid_len():
return len(line.strip()) return len(line.strip())
'''
def get_max_vm_rss_len():
with open('/proc/meminfo') as file:
kib = file.readline()[:-4].split(':')[1].lstrip()
return len(str(round(float(kib) / 1024)))
'''
def human(num): def human(num):
'''Convert KiB to MiB and right align''' '''Convert KiB to MiB and right align'''
return str(round(num / 1024.0)).rjust(6, ' ') return str(round(num / 1024.0)).rjust(6, ' ')
@ -79,6 +71,21 @@ def human(num):
"""#######################################################################79""" """#######################################################################79"""
sort_dict = {
'PID': 0,
'oom_score': 1,
'oom_score_adj': 2,
'cmdline': 3,
'Name': 4,
'UID': 5,
'VmRSS': 6,
'VmSwap': 7
}
"""#######################################################################79"""
# parse input # parse input
# todo: input validation # todo: input validation
@ -102,16 +109,45 @@ parser.add_argument(
type=str type=str
) )
parser.add_argument(
'--sort',
'-s',
help="""sort by unit; default: oom_score""",
default=None,
type=str
)
args = parser.parse_args() args = parser.parse_args()
display_cmdline = args.len display_cmdline = args.len
num_lines = args.num num_lines = args.num
sort_by = args.sort
if num_lines is None: if num_lines is None:
num_lines = 99999 num_lines = 99999
if display_cmdline is None: if display_cmdline is None:
display_cmdline = 99999 display_cmdline = 99999
if sort_by is None:
sort_by = 'oom_score'
if sort_by not in sort_dict:
print('Invalid -s/--sort value. Valid values are:\nPID\noom_scor'
'e [default value]\noom-sore_adj\nUID\nName\ncmdline\nVmR'
'SS\nVmSwap')
exit()
if sort_by == '1':
print('Sort by:', sort_by)
"""#######################################################################79""" """#######################################################################79"""
@ -125,9 +161,9 @@ status_names = []
for s in status_list: for s in status_list:
status_names.append(s.split(':')[0]) status_names.append(s.split(':')[0])
uid_index = status_names.index('Uid')
vm_rss_index = status_names.index('VmRSS') vm_rss_index = status_names.index('VmRSS')
vm_swap_index = status_names.index('VmSwap') vm_swap_index = status_names.index('VmSwap')
uid_index = status_names.index('Uid')
"""#######################################################################79""" """#######################################################################79"""
@ -163,20 +199,27 @@ for pid in listdir('/proc'):
continue continue
oom_list.append(( oom_list.append((
int(pid), int(oom_score), int(oom_score_adj), cmdline, name, int(uid), int(vm_rss), int(vm_swap))) int(pid), int(oom_score), int(oom_score_adj), cmdline,
name, int(uid), int(vm_rss), int(vm_swap)))
# list sorted by oom_score # list sorted by oom_score
oom_list_sorted = sorted(oom_list, key=itemgetter(1), reverse=True) oom_list_sorted = sorted(
oom_list, key=itemgetter(int(sort_dict[sort_by])), reverse=True)
'''
max_uid_len = len(str(sorted( max_uid_len = len(str(sorted(
oom_list, key=itemgetter(5), reverse=True)[0][5])) oom_list, key=itemgetter(5), reverse=True)[0][5]))
max_vm_rss_len = len(str(round( max_vm_rss_len = len(str(round(
sorted(oom_list, key=itemgetter(6), reverse=True)[0][6] / 1024.0))) sorted(oom_list, key=itemgetter(6), reverse=True)[0][6] / 1024.0)))
'''
if max_vm_rss_len < 5:
max_vm_rss_len = 5
max_pid_len = get_max_pid_len()
"""#######################################################################79""" """#######################################################################79"""
@ -185,24 +228,41 @@ max_vm_rss_len = len(str(round(
# print table # print table
max_pid_len = get_max_pid_len()
if display_cmdline == '0': if display_cmdline == '0':
print( print(
'oom_score oom_score_adj UID{}PID Name ' 'oom_score oom_score_adj{}UID{}PID Name {}VmRSS VmSwap'.format(
'VmRSS VmSwap'.format(' ' * (max_pid_len - 2))) ' ' * (max_uid_len - 2),
' ' * (max_pid_len - 2),
' ' * max_vm_rss_len
)
)
print('--------- ------------- ----- {} --------------- -' print(
'------- --------'.format('-' * max_pid_len)) '--------- ------------- {} {} --------------- {}-- --------'.format(
'-' * max_uid_len,
'-' * max_pid_len,
'-' * max_vm_rss_len
)
)
else: else:
print('oom_score oom_score_adj UID{}PID Name ' print(
' VmRSS VmSwap cmdline'.format(' ' * (max_pid_len - 2))) 'oom_score oom_score_adj{}UID{}PID Name {}VmRSS VmSwap cmdline'.format(
' ' * (max_uid_len - 2),
' ' * (max_pid_len - 2),
' ' * max_vm_rss_len
)
)
print('--------- ------------- ----- {} --------------- -' print(
'------- -------- -------'.format('-' * max_pid_len)) '--------- ------------- {} {} --------------- {}-- -------- -------'.format(
'-' * max_uid_len,
'-' * max_pid_len,
'-' * max_vm_rss_len
)
)
for i in oom_list_sorted[:int(num_lines)]: for i in oom_list_sorted[:int(num_lines)]:
@ -220,10 +280,10 @@ for i in oom_list_sorted[:int(num_lines)]:
'{} {} {} {} {} {} M {} M {}'.format( '{} {} {} {} {} {} M {} M {}'.format(
str(oom_score).rjust(9), str(oom_score).rjust(9),
str(oom_score_adj).rjust(13), str(oom_score_adj).rjust(13),
str(uid).rjust(5), str(uid).rjust(max_uid_len),
str(pid).rjust(max_pid_len), str(pid).rjust(max_pid_len),
name.ljust(15), name.ljust(15),
human(vm_rss), str(round(vm_rss / 1024.0)).rjust(max_vm_rss_len, ' '),
human(vm_swap), human(vm_swap),
cmdline[:int(display_cmdline)] cmdline[:int(display_cmdline)]
) )