stop running processes in finding victim process
This commit is contained in:
parent
50f08bc894
commit
e2ce62114b
214
nohang
214
nohang
@ -11,7 +11,7 @@ from operator import itemgetter
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
from sys import stdout
|
from sys import stdout
|
||||||
from signal import SIGKILL, SIGTERM
|
from signal import SIGKILL, SIGTERM, SIGSTOP, SIGCONT
|
||||||
|
|
||||||
sig_dict = {SIGKILL: 'SIGKILL',
|
sig_dict = {SIGKILL: 'SIGKILL',
|
||||||
SIGTERM: 'SIGTERM'}
|
SIGTERM: 'SIGTERM'}
|
||||||
@ -26,20 +26,40 @@ else:
|
|||||||
|
|
||||||
wait_time = 14
|
wait_time = 14
|
||||||
|
|
||||||
max_sleep_time = 2
|
max_sleep_time = 1
|
||||||
min_sleep_time = 0.1
|
min_sleep_time = 0.05
|
||||||
|
|
||||||
notify_helper_path = '/usr/bin/nohang_notify_helper'
|
notify_helper_path = '/usr/bin/nohang_notify_helper'
|
||||||
|
|
||||||
psi_path = '/proc/pressure/memory'
|
psi_path = '/proc/pressure/memory'
|
||||||
psi_support = os.path.exists(psi_path)
|
psi_support = os.path.exists(psi_path)
|
||||||
|
|
||||||
|
debug = False
|
||||||
|
|
||||||
|
|
||||||
|
stop_cont = True
|
||||||
|
|
||||||
|
SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
|
|
||||||
# function definition section
|
# function definition section
|
||||||
|
|
||||||
|
|
||||||
|
def uptime():
|
||||||
|
return float(rline1('/proc/uptime').split(' ')[0])
|
||||||
|
|
||||||
|
|
||||||
|
def pid_to_starttime(pid):
|
||||||
|
return float(rline1('/proc/' + pid + '/stat').rpartition(')')[2].split(' ')[20]) / float(SC_CLK_TCK)
|
||||||
|
|
||||||
|
|
||||||
|
def pid_to_state(pid):
|
||||||
|
return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1]
|
||||||
|
|
||||||
|
|
||||||
def update_stat_dict_and_print(key):
|
def update_stat_dict_and_print(key):
|
||||||
if key not in stat_dict:
|
if key not in stat_dict:
|
||||||
stat_dict.update({key: 1})
|
stat_dict.update({key: 1})
|
||||||
@ -261,6 +281,22 @@ def pid_to_name(pid):
|
|||||||
except ProcessLookupError:
|
except ProcessLookupError:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
'''
|
||||||
|
# return process name
|
||||||
|
def pid_to_rss(pid):
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open('/proc/' + pid + '/statm') as f:
|
||||||
|
for line in f:
|
||||||
|
return line.split(' ')[1]
|
||||||
|
except FileNotFoundError:
|
||||||
|
return 0
|
||||||
|
except ProcessLookupError:
|
||||||
|
return 0
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
def pid_to_cmdline(pid):
|
def pid_to_cmdline(pid):
|
||||||
"""
|
"""
|
||||||
@ -277,10 +313,11 @@ def pid_to_cmdline(pid):
|
|||||||
|
|
||||||
|
|
||||||
def pid_to_uid(pid):
|
def pid_to_uid(pid):
|
||||||
|
'''return euid'''
|
||||||
with open('/proc/' + pid + '/status') as f:
|
with open('/proc/' + pid + '/status') as f:
|
||||||
for n, line in enumerate(f):
|
for n, line in enumerate(f):
|
||||||
if n is uid_index:
|
if n is uid_index:
|
||||||
return line.split('\t')[1]
|
return line.split('\t')[2]
|
||||||
|
|
||||||
|
|
||||||
def notify_send_wait(title, body):
|
def notify_send_wait(title, body):
|
||||||
@ -390,6 +427,109 @@ def sleep_after_send_signal(signal):
|
|||||||
if print_sleep_periods:
|
if print_sleep_periods:
|
||||||
print(' sleep', min_delay_after_sigterm)
|
print(' sleep', min_delay_after_sigterm)
|
||||||
sleep(min_delay_after_sigterm)
|
sleep(min_delay_after_sigterm)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def stop():
|
||||||
|
print()
|
||||||
|
print('Stop running processes...')
|
||||||
|
t1 = time()
|
||||||
|
t2 = time()
|
||||||
|
stopped_list = []
|
||||||
|
for pid in os.listdir('/proc')[::-1]:
|
||||||
|
# only directories whose names consist only of numbers, except /proc/1/
|
||||||
|
if pid[0].isdecimal() is False or pid is '1' or pid == self_pid:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
# print(pid)
|
||||||
|
if pid_to_state(pid) == 'R':
|
||||||
|
if pid_to_cmdline(pid) != '' and pid_to_name(pid) != 'Xorg':
|
||||||
|
stopped_list.append(pid)
|
||||||
|
print('Send SIGSTOP to {}, {}, {}...'.format(
|
||||||
|
pid, pid_to_name(pid), pid_to_cmdline(pid)[:40]))
|
||||||
|
os.kill(int(pid), SIGSTOP)
|
||||||
|
t2 = time()
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
except ProcessLookupError:
|
||||||
|
continue
|
||||||
|
print('Stop time:', t2 - t1)
|
||||||
|
return stopped_list
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def cont(stopped_list):
|
||||||
|
print()
|
||||||
|
print('Continue stopped processes...')
|
||||||
|
t1 = time()
|
||||||
|
if len(stopped_list) > 0:
|
||||||
|
for pid in stopped_list:
|
||||||
|
print('Send SIGCONT to', [pid], pid_to_name(pid))
|
||||||
|
try:
|
||||||
|
os.kill(int(pid), SIGCONT)
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
except ProcessLookupError:
|
||||||
|
continue
|
||||||
|
t2 = time()
|
||||||
|
print('All cont time: ', t2 - t1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def print_states():
|
||||||
|
print()
|
||||||
|
t1 = time()
|
||||||
|
print('non-S states:')
|
||||||
|
for pid in os.listdir('/proc'):
|
||||||
|
# only directories whose names consist only of numbers, except /proc/1/
|
||||||
|
if pid[0].isdecimal() is False or pid is '1' or pid == self_pid:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
s = pid_to_state(pid)
|
||||||
|
if s == 'S':
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
print('State: {}, [{}], {}, {}...'.format(
|
||||||
|
s, pid, pid_to_name(pid), pid_to_cmdline(pid)[:40]))
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
|
except ProcessLookupError:
|
||||||
|
continue
|
||||||
|
t2 = time()
|
||||||
|
print('print state time:', t2 - t1)
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def fattest():
|
def fattest():
|
||||||
@ -402,7 +542,7 @@ def fattest():
|
|||||||
|
|
||||||
for pid in os.listdir('/proc'):
|
for pid in os.listdir('/proc'):
|
||||||
# only directories whose names consist only of numbers, except /proc/1/
|
# only directories whose names consist only of numbers, except /proc/1/
|
||||||
if pid[0].isdecimal() is False or pid is '1' or pid is self_pid:
|
if pid[0].isdecimal() is False or pid is '1' or pid == self_pid:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# find and modify badness (if it needs)
|
# find and modify badness (if it needs)
|
||||||
@ -448,8 +588,37 @@ def fattest():
|
|||||||
pid_badness_list.append((pid, badness))
|
pid_badness_list.append((pid, badness))
|
||||||
|
|
||||||
# Make list of (pid, badness) tuples, sorted by 'badness' values
|
# Make list of (pid, badness) tuples, sorted by 'badness' values
|
||||||
pid_tuple_list = sorted(
|
pid_tuple_list = sorted(pid_badness_list, key=itemgetter(1), reverse=True)[0]
|
||||||
pid_badness_list, key=itemgetter(1), reverse=True)[0]
|
|
||||||
|
|
||||||
|
# badness oom_score oom_score_adj RSS UID NAME (cmdline)
|
||||||
|
if debug:
|
||||||
|
x = sorted(pid_badness_list, key=itemgetter(1), reverse=True)
|
||||||
|
for i in x:
|
||||||
|
try:
|
||||||
|
print('PID: {} | badness: {} | name: {} | eUID: {} | cmdline: {}'.format(
|
||||||
|
i[0].rjust(5),
|
||||||
|
str(i[1]).rjust(5),
|
||||||
|
pid_to_name(i[0]).ljust(15),
|
||||||
|
pid_to_uid(i[0]).rjust(6),
|
||||||
|
pid_to_cmdline(i[0])[:50]
|
||||||
|
))
|
||||||
|
print(pid_to_state(i[0]))
|
||||||
|
|
||||||
|
k = 0.5
|
||||||
|
uptime_ratio = 1 - pid_to_starttime(i[0]) / uptime()
|
||||||
|
uptime_ratio2 = uptime_ratio ** k
|
||||||
|
print(uptime_ratio, uptime_ratio2, i[1], i[1] * uptime_ratio2)
|
||||||
|
|
||||||
|
#print(pid_to_starttime('1'))
|
||||||
|
#print(uptime())
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print('(FileNotFoundError)')
|
||||||
|
continue
|
||||||
|
except ProcessLookupError:
|
||||||
|
print('(ProcessLookupError)')
|
||||||
|
continue
|
||||||
|
|
||||||
pid = pid_tuple_list[0]
|
pid = pid_tuple_list[0]
|
||||||
|
|
||||||
@ -464,6 +633,9 @@ def find_victim_and_send_signal(signal):
|
|||||||
Find victim with highest badness and send SIGTERM/SIGKILL
|
Find victim with highest badness and send SIGTERM/SIGKILL
|
||||||
"""
|
"""
|
||||||
# print()
|
# print()
|
||||||
|
if stop_cont:
|
||||||
|
print_states()
|
||||||
|
stopped_list = stop()
|
||||||
|
|
||||||
pid, victim_badness = fattest()
|
pid, victim_badness = fattest()
|
||||||
name = pid_to_name(pid)
|
name = pid_to_name(pid)
|
||||||
@ -594,6 +766,8 @@ def find_victim_and_send_signal(signal):
|
|||||||
|
|
||||||
if execute_the_command and signal is SIGTERM and name in etc_dict:
|
if execute_the_command and signal is SIGTERM and name in etc_dict:
|
||||||
command = etc_dict[name]
|
command = etc_dict[name]
|
||||||
|
if stop_cont:
|
||||||
|
os.kill(int(pid), SIGCONT)
|
||||||
exit_status = os.system(etc_dict[name].replace('$PID', pid))
|
exit_status = os.system(etc_dict[name].replace('$PID', pid))
|
||||||
if exit_status == 0:
|
if exit_status == 0:
|
||||||
exit_status = '\033[32m0\033[0m'
|
exit_status = '\033[32m0\033[0m'
|
||||||
@ -622,6 +796,8 @@ def find_victim_and_send_signal(signal):
|
|||||||
else:
|
else:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if stop_cont:
|
||||||
|
os.kill(int(pid), SIGCONT)
|
||||||
os.kill(int(pid), signal)
|
os.kill(int(pid), signal)
|
||||||
response_time = time() - time0
|
response_time = time() - time0
|
||||||
send_result = '\033[32mOK\033[0m; response time: {} ms'.format(
|
send_result = '\033[32mOK\033[0m; response time: {} ms'.format(
|
||||||
@ -696,12 +872,19 @@ def find_victim_and_send_signal(signal):
|
|||||||
|
|
||||||
print(stats_msg)
|
print(stats_msg)
|
||||||
|
|
||||||
|
if stop_cont:
|
||||||
|
print_states()
|
||||||
|
cont(stopped_list)
|
||||||
|
print_states()
|
||||||
|
|
||||||
sleep_after_send_signal(signal)
|
sleep_after_send_signal(signal)
|
||||||
|
|
||||||
|
|
||||||
def sleep_after_check_mem():
|
def sleep_after_check_mem():
|
||||||
"""Specify sleep times depends on rates and avialable memory."""
|
"""Specify sleep times depends on rates and avialable memory."""
|
||||||
|
|
||||||
|
# It's magic!
|
||||||
|
|
||||||
if mem_min_sigkill_kb < mem_min_sigterm_kb:
|
if mem_min_sigkill_kb < mem_min_sigterm_kb:
|
||||||
mem_point = mem_available - mem_min_sigterm_kb
|
mem_point = mem_available - mem_min_sigterm_kb
|
||||||
else:
|
else:
|
||||||
@ -1361,6 +1544,23 @@ psi_min_sleep_time_after_action = psi_avg10_sleep_time
|
|||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# stopped_list = stop()
|
||||||
|
|
||||||
|
|
||||||
|
# cont(stopped_list)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if psi_support and not ignore_psi:
|
if psi_support and not ignore_psi:
|
||||||
kill_psi_t0 = time() + psi_avg10_sleep_time
|
kill_psi_t0 = time() + psi_avg10_sleep_time
|
||||||
term_psi_t0 = time() + psi_avg10_sleep_time
|
term_psi_t0 = time() + psi_avg10_sleep_time
|
||||||
|
22
nohang.conf
22
nohang.conf
@ -65,7 +65,7 @@ zram_max_sigkill = 55 %
|
|||||||
Response on PSI memory some avg10 value
|
Response on PSI memory some avg10 value
|
||||||
(/proc/pressure/memory on systems with Linux 4.20+).
|
(/proc/pressure/memory on systems with Linux 4.20+).
|
||||||
|
|
||||||
ignore_psi = False
|
ignore_psi = True
|
||||||
|
|
||||||
sigterm_psi_avg10 = 60
|
sigterm_psi_avg10 = 60
|
||||||
sigkill_psi_avg10 = 90
|
sigkill_psi_avg10 = 90
|
||||||
@ -93,8 +93,8 @@ psi_avg10_sleep_time = 60
|
|||||||
|
|
||||||
Valid values are positive floating-point numbers.
|
Valid values are positive floating-point numbers.
|
||||||
|
|
||||||
rate_mem = 4
|
rate_mem = 6
|
||||||
rate_swap = 2
|
rate_swap = 3
|
||||||
rate_zram = 1
|
rate_zram = 1
|
||||||
|
|
||||||
See also https://github.com/rfjakob/earlyoom/issues/61
|
See also https://github.com/rfjakob/earlyoom/issues/61
|
||||||
@ -132,7 +132,7 @@ min_delay_after_sigkill = 0.8
|
|||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
Values are case sensitive.
|
Values are case sensitive.
|
||||||
|
|
||||||
decrease_oom_score_adj = True
|
decrease_oom_score_adj = False
|
||||||
|
|
||||||
Valid values are integers from the range [0; 1000].
|
Valid values are integers from the range [0; 1000].
|
||||||
|
|
||||||
@ -160,7 +160,7 @@ oom_score_adj_max = 30
|
|||||||
|
|
||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
|
|
||||||
regex_matching = True
|
regex_matching = False
|
||||||
|
|
||||||
Syntax:
|
Syntax:
|
||||||
|
|
||||||
@ -184,7 +184,7 @@ regex_matching = True
|
|||||||
|
|
||||||
A good option that allows fine adjustment.
|
A good option that allows fine adjustment.
|
||||||
|
|
||||||
re_match_cmdline = True
|
re_match_cmdline = False
|
||||||
|
|
||||||
@CMDLINE_RE 300 /// -childID|--type=renderer
|
@CMDLINE_RE 300 /// -childID|--type=renderer
|
||||||
|
|
||||||
@ -195,7 +195,7 @@ re_match_cmdline = True
|
|||||||
|
|
||||||
The most slow option
|
The most slow option
|
||||||
|
|
||||||
re_match_uid = True
|
re_match_uid = False
|
||||||
|
|
||||||
@UID_RE -100 /// ^0$
|
@UID_RE -100 /// ^0$
|
||||||
|
|
||||||
@ -215,7 +215,7 @@ re_match_uid = True
|
|||||||
|
|
||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
|
|
||||||
execute_the_command = True
|
execute_the_command = False
|
||||||
|
|
||||||
The length of the process name can't exceed 15 characters.
|
The length of the process name can't exceed 15 characters.
|
||||||
The syntax is as follows: lines starting with keyword $ETC are
|
The syntax is as follows: lines starting with keyword $ETC are
|
||||||
@ -256,7 +256,7 @@ $ETC firefox-esr /// kill -SEGV $PID
|
|||||||
See also wiki.archlinux.org/index.php/Desktop_notifications
|
See also wiki.archlinux.org/index.php/Desktop_notifications
|
||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
|
|
||||||
gui_notifications = True
|
gui_notifications = False
|
||||||
|
|
||||||
Enable GUI notifications about the low level of available memory.
|
Enable GUI notifications about the low level of available memory.
|
||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
@ -294,7 +294,7 @@ zram_max_warnings = 40 %
|
|||||||
Display the configuration when the program starts.
|
Display the configuration when the program starts.
|
||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
|
|
||||||
print_config = True
|
print_config = False
|
||||||
|
|
||||||
Print memory check results.
|
Print memory check results.
|
||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
@ -304,5 +304,5 @@ print_mem_check_results = True
|
|||||||
Print sleep periods between memory checks.
|
Print sleep periods between memory checks.
|
||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
|
|
||||||
print_sleep_periods = True
|
print_sleep_periods = False
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user