diff --git a/README.md b/README.md
index 9f9ac24..5a7d6f9 100644
--- a/README.md
+++ b/README.md
@@ -193,7 +193,7 @@ Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, fe
- Fix: replace `re.fullmatch()` by `re.search()`
- Validation RE patterns at startup
- Improve output:
- - Display `oom_score`, `oom_score_adj`, `euid`, `state`, `VmSize`, `RssAnon`, `RssFile`, `RssShmem`, `realpath` and `cmdline` of the victim in corrective action reports
+ - Display `oom_score`, `oom_score_adj`, `PPID`, `EUID`, `State`, `VmSize`, `RssAnon`, `RssFile`, `RssShmem`, `realpath` and `cmdline` of the victim in corrective action reports
- Print in terminal with colors
- Print statistics on corrective actions after each corrective action
- Improve poll rate algorithm
diff --git a/nohang b/nohang
index aef85b5..dde93ea 100755
--- a/nohang
+++ b/nohang
@@ -1,11 +1,11 @@
#!/usr/bin/env python3
"""A daemon that prevents OOM in Linux systems."""
import os
-import ctypes
+from ctypes import CDLL
from time import sleep, time
from operator import itemgetter
from sys import stdout
-from signal import SIGKILL, SIGTERM, SIGSTOP, SIGCONT
+from signal import SIGKILL, SIGTERM
start_time = time()
@@ -40,11 +40,6 @@ HR = '~' * 79
# todo: make config option
print_total_stat = True
-
-stop_cont = False
-stop_cont_warn = False
-
-
##########################################################################
# define functions
@@ -56,69 +51,26 @@ def mlockall():
MCL_FUTURE = 2
MCL_ONFAULT = 4
- libc = ctypes.CDLL('libc.so.6', use_errno=True)
+ libc = CDLL('libc.so.6', use_errno=True)
- result = libc.mlockall(MCL_CURRENT|MCL_FUTURE|MCL_ONFAULT)
+ result = libc.mlockall(
+ MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
+ )
if result != 0:
- result = libc.mlockall(MCL_CURRENT|MCL_FUTURE)
+ result = libc.mlockall(
+ MCL_CURRENT | MCL_FUTURE
+ )
if result != 0:
- print('Can not lock all memory')
+ print('Cannot lock all memory')
else:
- print('All memory locked with MCL_CURRENT|MCL_FUTURE')
+ print('All memory locked with MCL_CURRENT | MCL_FUTURE')
else:
- print('All memory locked with MCL_CURRENT|MCL_FUTURE|MCL_ONFAULT')
+ print('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
def pid_to_state(pid):
return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1]
-def stop():
- #print()
- #print('Stop running processes...')
- t1 = time()
- t2 = time()
- stopped_list = []
- for pid in os.listdir('/proc')[::-1]:
- # only directories whose names consist only of numbers, except /proc/1/
- if pid[0].isdecimal() is False or pid is '1' or pid == self_pid:
- continue
- try:
- oom_score_r = int(rline1('/proc/' + pid + '/oom_score'))
- if oom_score_r > 9:
- uid_r = pid_to_uid(pid)
- #print('PID: {}, State: {}, oom_score {}'.format(pid, pid_to_state(pid), oom_score_r))
- if uid_r != '0':
- stopped_list.append(pid)
- print('Send SIGSTOP to {}, {}, {}...'.format(
- pid, pid_to_name(pid), pid_to_cmdline(pid)[:40]))
- os.kill(int(pid), SIGSTOP)
- t2 = time()
- except FileNotFoundError:
- continue
- except ProcessLookupError:
- continue
- print('Stop time:', t2 - t1)
- stdout.flush()
-
- return stopped_list
-
-def cont(stopped_list):
- print()
- print('Continue stopped processes...')
- t1 = time()
- if len(stopped_list) > 0:
- for pid in stopped_list:
- print('Send SIGCONT to', [pid], pid_to_name(pid))
- try:
- os.kill(int(pid), SIGCONT)
- except FileNotFoundError:
- continue
- except ProcessLookupError:
- continue
- t2 = time()
- print('All cont time: ', t2 - t1)
-
-
def update_stat_dict_and_print(key):
@@ -404,9 +356,6 @@ def send_notify_warn():
(implement Low memory warnings)
"""
- if stop_cont_warn:
- stopped_list = stop()
-
# find process with max badness
fat_tuple = fattest()
pid = fat_tuple[0]
@@ -428,7 +377,7 @@ def send_notify_warn():
# title = 'Low memory: {}'.format(low_mem_percent)
title = 'Low memory'
- body = 'Hog: {} [{}]'.format(
+ body = 'Hog: {}, PID: {}'.format(
name.replace(
# symbol '&' can break notifications in some themes,
# therefore it is replaced by '*'
@@ -443,9 +392,6 @@ def send_notify_warn():
# send notification to user that runs this nohang
notify_send_wait(title, body)
- if stop_cont_warn:
- cont(stopped_list)
-
def send_notify(signal, name, pid):
"""
@@ -456,7 +402,7 @@ def send_notify(signal, name, pid):
pid: str process pid
"""
title = 'Hang prevention'
- body = '{} {} [{}]'.format(
+ body = '{} {}, PID: {}'.format(
notify_sig_dict[signal],
name.replace(
# symbol '&' can break notifications in some themes,
@@ -614,11 +560,6 @@ def find_victim_and_send_signal(signal):
-> implement_corrective_action()
"""
-
- if stop_cont:
- stopped_list = stop()
-
-
pid, victim_badness = fattest()
name = pid_to_name(pid)
@@ -633,8 +574,14 @@ def find_victim_and_send_signal(signal):
with open('/proc/' + pid + '/status') as f:
for n, line in enumerate(f):
+
+
+ if n is ppid_index:
+ ppid = line.split('\t')[1]
+
+
if n is uid_index:
- uid = line.split('\t')[1]
+ uid = line.split('\t')[2]
continue
if n is vm_size_index:
@@ -691,16 +638,13 @@ def find_victim_and_send_signal(signal):
for i in range(len(f_list)):
if i is ppid_index:
- ppid = f_list[i].split('\t')[2]
-
+ ppid = f_list[i].split('\t')[1]
for i in range(len(f_list)):
if i is uid_index:
uid = f_list[i].split('\t')[2]
-
-
if i is vm_size_index:
vm_size = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
@@ -748,10 +692,10 @@ def find_victim_and_send_signal(signal):
len_vm = len(str(vm_size))
-
realpath = os.path.realpath('/proc/' + pid + '/exe')
state = pid_to_state(pid)
-
+ pname = pid_to_name(ppid.strip('\n '))
+ # print([ppid], [pname])
if detailed_rss:
@@ -760,7 +704,8 @@ def find_victim_and_send_signal(signal):
'\n Name: \033[33m{}\033[0m' \
'\n State: \033[33m{}\033[0m' \
'\n PID: \033[33m{}\033[0m' \
- '\n UID: \033[33m{}\033[0m' \
+ '\n PPID: \033[33m{}\033[0m (\033[33m{}\033[0m)' \
+ '\n EUID: \033[33m{}\033[0m' \
'\n badness: \033[33m{}\033[0m, ' \
'oom_score: \033[33m{}\033[0m, ' \
'oom_score_adj: \033[33m{}\033[0m' \
@@ -775,6 +720,8 @@ def find_victim_and_send_signal(signal):
name,
state,
pid,
+ ppid.strip('\n '),
+ pname,
uid,
victim_badness,
oom_score,
@@ -853,13 +800,8 @@ def find_victim_and_send_signal(signal):
m = check_mem_and_swap()
ma = round(int(m[0]) / 1024.0)
sf = round(int(m[2]) / 1024.0)
- print('\nMemory status before sending a signal:\nMemA'
- 'v: {} MiB, SwFree: {} MiB'.format(ma, sf))
-
-
- if stop_cont:
- os.kill(int(pid), SIGCONT)
-
+ print('\nMemory status before sending a signal:\n MemAvailable'
+ ': {} MiB, SwapFree: {} MiB'.format(ma, sf))
os.kill(int(pid), signal)
response_time = time() - time0
@@ -909,10 +851,6 @@ def find_victim_and_send_signal(signal):
key = 'victim badness < min_badness'
update_stat_dict_and_print(key)
-
- if stop_cont:
- cont(stopped_list)
-
sleep_after_send_signal(signal)