stop running processes in finding victim process

This commit is contained in:
Alexey Avramov 2019-01-13 20:16:06 +09:00
parent 50f08bc894
commit e2ce62114b
2 changed files with 222 additions and 22 deletions

214
nohang
View File

@ -11,7 +11,7 @@ from operator import itemgetter
from argparse import ArgumentParser from argparse import ArgumentParser
from sys import stdout from sys import stdout
from signal import SIGKILL, SIGTERM from signal import SIGKILL, SIGTERM, SIGSTOP, SIGCONT
sig_dict = {SIGKILL: 'SIGKILL', sig_dict = {SIGKILL: 'SIGKILL',
SIGTERM: 'SIGTERM'} SIGTERM: 'SIGTERM'}
@ -26,20 +26,40 @@ else:
wait_time = 14 wait_time = 14
max_sleep_time = 2 max_sleep_time = 1
min_sleep_time = 0.1 min_sleep_time = 0.05
notify_helper_path = '/usr/bin/nohang_notify_helper' notify_helper_path = '/usr/bin/nohang_notify_helper'
psi_path = '/proc/pressure/memory' psi_path = '/proc/pressure/memory'
psi_support = os.path.exists(psi_path) psi_support = os.path.exists(psi_path)
debug = False
stop_cont = True
SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
########################################################################## ##########################################################################
# function definition section # function definition section
def uptime():
return float(rline1('/proc/uptime').split(' ')[0])
def pid_to_starttime(pid):
return float(rline1('/proc/' + pid + '/stat').rpartition(')')[2].split(' ')[20]) / float(SC_CLK_TCK)
def pid_to_state(pid):
return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1]
def update_stat_dict_and_print(key): def update_stat_dict_and_print(key):
if key not in stat_dict: if key not in stat_dict:
stat_dict.update({key: 1}) stat_dict.update({key: 1})
@ -261,6 +281,22 @@ def pid_to_name(pid):
except ProcessLookupError: except ProcessLookupError:
return '' return ''
'''
# return process name
def pid_to_rss(pid):
"""
"""
try:
with open('/proc/' + pid + '/statm') as f:
for line in f:
return line.split(' ')[1]
except FileNotFoundError:
return 0
except ProcessLookupError:
return 0
'''
def pid_to_cmdline(pid): def pid_to_cmdline(pid):
""" """
@ -277,10 +313,11 @@ def pid_to_cmdline(pid):
def pid_to_uid(pid): def pid_to_uid(pid):
'''return euid'''
with open('/proc/' + pid + '/status') as f: with open('/proc/' + pid + '/status') as f:
for n, line in enumerate(f): for n, line in enumerate(f):
if n is uid_index: if n is uid_index:
return line.split('\t')[1] return line.split('\t')[2]
def notify_send_wait(title, body): def notify_send_wait(title, body):
@ -390,6 +427,109 @@ def sleep_after_send_signal(signal):
if print_sleep_periods: if print_sleep_periods:
print(' sleep', min_delay_after_sigterm) print(' sleep', min_delay_after_sigterm)
sleep(min_delay_after_sigterm) sleep(min_delay_after_sigterm)
def stop():
print()
print('Stop running processes...')
t1 = time()
t2 = time()
stopped_list = []
for pid in os.listdir('/proc')[::-1]:
# only directories whose names consist only of numbers, except /proc/1/
if pid[0].isdecimal() is False or pid is '1' or pid == self_pid:
continue
try:
# print(pid)
if pid_to_state(pid) == 'R':
if pid_to_cmdline(pid) != '' and pid_to_name(pid) != 'Xorg':
stopped_list.append(pid)
print('Send SIGSTOP to {}, {}, {}...'.format(
pid, pid_to_name(pid), pid_to_cmdline(pid)[:40]))
os.kill(int(pid), SIGSTOP)
t2 = time()
except FileNotFoundError:
continue
except ProcessLookupError:
continue
print('Stop time:', t2 - t1)
return stopped_list
def cont(stopped_list):
print()
print('Continue stopped processes...')
t1 = time()
if len(stopped_list) > 0:
for pid in stopped_list:
print('Send SIGCONT to', [pid], pid_to_name(pid))
try:
os.kill(int(pid), SIGCONT)
except FileNotFoundError:
continue
except ProcessLookupError:
continue
t2 = time()
print('All cont time: ', t2 - t1)
def print_states():
print()
t1 = time()
print('non-S states:')
for pid in os.listdir('/proc'):
# only directories whose names consist only of numbers, except /proc/1/
if pid[0].isdecimal() is False or pid is '1' or pid == self_pid:
continue
try:
s = pid_to_state(pid)
if s == 'S':
continue
else:
print('State: {}, [{}], {}, {}...'.format(
s, pid, pid_to_name(pid), pid_to_cmdline(pid)[:40]))
except FileNotFoundError:
continue
except ProcessLookupError:
continue
t2 = time()
print('print state time:', t2 - t1)
print()
def fattest(): def fattest():
@ -402,7 +542,7 @@ def fattest():
for pid in os.listdir('/proc'): for pid in os.listdir('/proc'):
# only directories whose names consist only of numbers, except /proc/1/ # only directories whose names consist only of numbers, except /proc/1/
if pid[0].isdecimal() is False or pid is '1' or pid is self_pid: if pid[0].isdecimal() is False or pid is '1' or pid == self_pid:
continue continue
# find and modify badness (if it needs) # find and modify badness (if it needs)
@ -448,8 +588,37 @@ def fattest():
pid_badness_list.append((pid, badness)) pid_badness_list.append((pid, badness))
# Make list of (pid, badness) tuples, sorted by 'badness' values # Make list of (pid, badness) tuples, sorted by 'badness' values
pid_tuple_list = sorted( pid_tuple_list = sorted(pid_badness_list, key=itemgetter(1), reverse=True)[0]
pid_badness_list, key=itemgetter(1), reverse=True)[0]
# badness oom_score oom_score_adj RSS UID NAME (cmdline)
if debug:
x = sorted(pid_badness_list, key=itemgetter(1), reverse=True)
for i in x:
try:
print('PID: {} | badness: {} | name: {} | eUID: {} | cmdline: {}'.format(
i[0].rjust(5),
str(i[1]).rjust(5),
pid_to_name(i[0]).ljust(15),
pid_to_uid(i[0]).rjust(6),
pid_to_cmdline(i[0])[:50]
))
print(pid_to_state(i[0]))
k = 0.5
uptime_ratio = 1 - pid_to_starttime(i[0]) / uptime()
uptime_ratio2 = uptime_ratio ** k
print(uptime_ratio, uptime_ratio2, i[1], i[1] * uptime_ratio2)
#print(pid_to_starttime('1'))
#print(uptime())
except FileNotFoundError:
print('(FileNotFoundError)')
continue
except ProcessLookupError:
print('(ProcessLookupError)')
continue
pid = pid_tuple_list[0] pid = pid_tuple_list[0]
@ -464,6 +633,9 @@ def find_victim_and_send_signal(signal):
Find victim with highest badness and send SIGTERM/SIGKILL Find victim with highest badness and send SIGTERM/SIGKILL
""" """
# print() # print()
if stop_cont:
print_states()
stopped_list = stop()
pid, victim_badness = fattest() pid, victim_badness = fattest()
name = pid_to_name(pid) name = pid_to_name(pid)
@ -594,6 +766,8 @@ def find_victim_and_send_signal(signal):
if execute_the_command and signal is SIGTERM and name in etc_dict: if execute_the_command and signal is SIGTERM and name in etc_dict:
command = etc_dict[name] command = etc_dict[name]
if stop_cont:
os.kill(int(pid), SIGCONT)
exit_status = os.system(etc_dict[name].replace('$PID', pid)) exit_status = os.system(etc_dict[name].replace('$PID', pid))
if exit_status == 0: if exit_status == 0:
exit_status = '\033[32m0\033[0m' exit_status = '\033[32m0\033[0m'
@ -622,6 +796,8 @@ def find_victim_and_send_signal(signal):
else: else:
try: try:
if stop_cont:
os.kill(int(pid), SIGCONT)
os.kill(int(pid), signal) os.kill(int(pid), signal)
response_time = time() - time0 response_time = time() - time0
send_result = '\033[32mOK\033[0m; response time: {} ms'.format( send_result = '\033[32mOK\033[0m; response time: {} ms'.format(
@ -696,12 +872,19 @@ def find_victim_and_send_signal(signal):
print(stats_msg) print(stats_msg)
if stop_cont:
print_states()
cont(stopped_list)
print_states()
sleep_after_send_signal(signal) sleep_after_send_signal(signal)
def sleep_after_check_mem(): def sleep_after_check_mem():
"""Specify sleep times depends on rates and avialable memory.""" """Specify sleep times depends on rates and avialable memory."""
# It's magic!
if mem_min_sigkill_kb < mem_min_sigterm_kb: if mem_min_sigkill_kb < mem_min_sigterm_kb:
mem_point = mem_available - mem_min_sigterm_kb mem_point = mem_available - mem_min_sigterm_kb
else: else:
@ -1361,6 +1544,23 @@ psi_min_sleep_time_after_action = psi_avg10_sleep_time
########################################################################## ##########################################################################
# stopped_list = stop()
# cont(stopped_list)
if psi_support and not ignore_psi: if psi_support and not ignore_psi:
kill_psi_t0 = time() + psi_avg10_sleep_time kill_psi_t0 = time() + psi_avg10_sleep_time
term_psi_t0 = time() + psi_avg10_sleep_time term_psi_t0 = time() + psi_avg10_sleep_time

View File

@ -65,7 +65,7 @@ zram_max_sigkill = 55 %
Response on PSI memory some avg10 value Response on PSI memory some avg10 value
(/proc/pressure/memory on systems with Linux 4.20+). (/proc/pressure/memory on systems with Linux 4.20+).
ignore_psi = False ignore_psi = True
sigterm_psi_avg10 = 60 sigterm_psi_avg10 = 60
sigkill_psi_avg10 = 90 sigkill_psi_avg10 = 90
@ -93,8 +93,8 @@ psi_avg10_sleep_time = 60
Valid values are positive floating-point numbers. Valid values are positive floating-point numbers.
rate_mem = 4 rate_mem = 6
rate_swap = 2 rate_swap = 3
rate_zram = 1 rate_zram = 1
See also https://github.com/rfjakob/earlyoom/issues/61 See also https://github.com/rfjakob/earlyoom/issues/61
@ -132,7 +132,7 @@ min_delay_after_sigkill = 0.8
Valid values are True and False. Valid values are True and False.
Values are case sensitive. Values are case sensitive.
decrease_oom_score_adj = True decrease_oom_score_adj = False
Valid values are integers from the range [0; 1000]. Valid values are integers from the range [0; 1000].
@ -160,7 +160,7 @@ oom_score_adj_max = 30
Valid values are True and False. Valid values are True and False.
regex_matching = True regex_matching = False
Syntax: Syntax:
@ -184,7 +184,7 @@ regex_matching = True
A good option that allows fine adjustment. A good option that allows fine adjustment.
re_match_cmdline = True re_match_cmdline = False
@CMDLINE_RE 300 /// -childID|--type=renderer @CMDLINE_RE 300 /// -childID|--type=renderer
@ -195,7 +195,7 @@ re_match_cmdline = True
The most slow option The most slow option
re_match_uid = True re_match_uid = False
@UID_RE -100 /// ^0$ @UID_RE -100 /// ^0$
@ -215,7 +215,7 @@ re_match_uid = True
Valid values are True and False. Valid values are True and False.
execute_the_command = True execute_the_command = False
The length of the process name can't exceed 15 characters. The length of the process name can't exceed 15 characters.
The syntax is as follows: lines starting with keyword $ETC are The syntax is as follows: lines starting with keyword $ETC are
@ -256,7 +256,7 @@ $ETC firefox-esr /// kill -SEGV $PID
See also wiki.archlinux.org/index.php/Desktop_notifications See also wiki.archlinux.org/index.php/Desktop_notifications
Valid values are True and False. Valid values are True and False.
gui_notifications = True gui_notifications = False
Enable GUI notifications about the low level of available memory. Enable GUI notifications about the low level of available memory.
Valid values are True and False. Valid values are True and False.
@ -294,7 +294,7 @@ zram_max_warnings = 40 %
Display the configuration when the program starts. Display the configuration when the program starts.
Valid values are True and False. Valid values are True and False.
print_config = True print_config = False
Print memory check results. Print memory check results.
Valid values are True and False. Valid values are True and False.
@ -304,5 +304,5 @@ print_mem_check_results = True
Print sleep periods between memory checks. Print sleep periods between memory checks.
Valid values are True and False. Valid values are True and False.
print_sleep_periods = True print_sleep_periods = False