speeding up the search for the victim

This commit is contained in:
Alexey Avramov 2020-03-29 05:21:19 +09:00
parent 72b1197aff
commit 0b9be5a41c

View File

@ -290,23 +290,27 @@ def pop(cmd, username):
)) ))
if swap_total == 0: if swap_total == 0:
wait_time = 5 wait_time = 10
else: else:
wait_time = 25 wait_time = 30
t3 = monotonic() t3 = monotonic()
with Popen(cmd) as proc: try:
try: with Popen(cmd) as proc:
proc.wait(timeout=wait_time) try:
err = proc.poll() proc.wait(timeout=wait_time)
t4 = monotonic() err = proc.poll()
except TimeoutExpired: t4 = monotonic()
proc.kill() except TimeoutExpired:
t4 = monotonic() proc.kill()
t4 = monotonic()
if debug_gui_notifications: if debug_gui_notifications:
log('TimeoutExpired: notify user: {}'.format(username)) log('TimeoutExpired: notify user: {}'.format(username))
except Exception as e:
th_name = threading.current_thread().getName()
log('Exception in {}: {}'.format(th_name, e))
if debug_gui_notifications: if debug_gui_notifications:
log('Popen time: {} sec; exit status: {}; cmd: {}'.format( log('Popen time: {} sec; exit status: {}; cmd: {}'.format(
@ -472,7 +476,6 @@ def check_config():
log(' fill_rate_zram: {}'.format(fill_rate_zram)) log(' fill_rate_zram: {}'.format(fill_rate_zram))
log(' max_sleep: {} sec'.format(max_sleep)) log(' max_sleep: {} sec'.format(max_sleep))
log(' min_sleep: {} sec'.format(min_sleep)) log(' min_sleep: {} sec'.format(min_sleep))
log(' over_sleep: {} sec'.format(over_sleep))
log('\n4. Warnings and notifications') log('\n4. Warnings and notifications')
@ -732,7 +735,7 @@ def signal_handler(signum, frame):
def signal_handler_inner(signum, frame): def signal_handler_inner(signum, frame):
""" """
""" """
log('Signal handler called with the {} signal (ignored) '.format( log('Got the {} signal (ignored) '.format(
sig_dict[signum])) sig_dict[signum]))
@ -887,7 +890,7 @@ def pid_to_name(pid):
""" """
""" """
try: try:
with open('/proc/' + pid + '/comm', 'rb') as f: with open('/proc/{}/comm'.format(pid), 'rb', buffering=0) as f:
return f.read().decode('utf-8', 'ignore')[:-1] return f.read().decode('utf-8', 'ignore')[:-1]
except FileNotFoundError: except FileNotFoundError:
return '' return ''
@ -1001,23 +1004,25 @@ def pid_to_cwd(pid):
def pid_to_uid(pid): def pid_to_uid(pid):
"""return euid""" """return euid"""
try: try:
with open('/proc/' + pid + '/status') as f: with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f:
for n, line in enumerate(f):
if n is uid_index:
return line.split('\t')[2]
except UnicodeDecodeError:
with open('/proc/' + pid + '/status', 'rb') as f:
f_list = f.read().decode('utf-8', 'ignore').split('\n') f_list = f.read().decode('utf-8', 'ignore').split('\n')
return f_list[uid_index].split('\t')[2] return f_list[uid_index].split('\t')[2]
except FileNotFoundError: except FileNotFoundError:
return '' return ''
except ProcessLookupError:
return ''
def pid_to_badness(pid): def pid_to_badness(pid, oom_score):
"""Find and modify badness (if it needs).""" """Find and modify badness (if it needs)."""
oom_score_adj = None oom_score_adj = None
try: try:
oom_score = int(rline1('/proc/' + pid + '/oom_score'))
if oom_score is None:
oom_score = pid_to_oom_score(pid)
if oom_score == 0: if oom_score == 0:
return oom_score, oom_score return oom_score, oom_score
@ -1025,7 +1030,7 @@ def pid_to_badness(pid):
badness = oom_score badness = oom_score
if ignore_positive_oom_score_adj: if ignore_positive_oom_score_adj:
oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj')) oom_score_adj = pid_to_oom_score_adj(pid)
if oom_score_adj > 0: if oom_score_adj > 0:
badness = badness - oom_score_adj badness = badness - oom_score_adj
@ -1038,8 +1043,7 @@ def pid_to_badness(pid):
badness += badness_adj badness += badness_adj
else: else:
if oom_score_adj is None: if oom_score_adj is None:
oom_score_adj = int(rline1( oom_score_adj = pid_to_oom_score_adj(pid)
'/proc/' + pid + '/oom_score_adj'))
if oom_score_adj >= 0: if oom_score_adj >= 0:
badness += badness_adj badness += badness_adj
@ -1052,8 +1056,7 @@ def pid_to_badness(pid):
badness += badness_adj badness += badness_adj
else: else:
if oom_score_adj is None: if oom_score_adj is None:
oom_score_adj = int(rline1( oom_score_adj = pid_to_oom_score_adj(pid)
'/proc/' + pid + '/oom_score_adj'))
if oom_score_adj >= 0: if oom_score_adj >= 0:
badness += badness_adj badness += badness_adj
@ -1066,8 +1069,7 @@ def pid_to_badness(pid):
badness += badness_adj badness += badness_adj
else: else:
if oom_score_adj is None: if oom_score_adj is None:
oom_score_adj = int(rline1( oom_score_adj = pid_to_oom_score_adj(pid)
'/proc/' + pid + '/oom_score_adj'))
if oom_score_adj >= 0: if oom_score_adj >= 0:
badness += badness_adj badness += badness_adj
@ -1080,8 +1082,7 @@ def pid_to_badness(pid):
badness += badness_adj badness += badness_adj
else: else:
if oom_score_adj is None: if oom_score_adj is None:
oom_score_adj = int(rline1( oom_score_adj = pid_to_oom_score_adj(pid)
'/proc/' + pid + '/oom_score_adj'))
if oom_score_adj >= 0: if oom_score_adj >= 0:
badness += badness_adj badness += badness_adj
@ -1094,8 +1095,7 @@ def pid_to_badness(pid):
badness += badness_adj badness += badness_adj
else: else:
if oom_score_adj is None: if oom_score_adj is None:
oom_score_adj = int(rline1( oom_score_adj = pid_to_oom_score_adj(pid)
'/proc/' + pid + '/oom_score_adj'))
if oom_score_adj >= 0: if oom_score_adj >= 0:
badness += badness_adj badness += badness_adj
@ -1108,8 +1108,7 @@ def pid_to_badness(pid):
badness += badness_adj badness += badness_adj
else: else:
if oom_score_adj is None: if oom_score_adj is None:
oom_score_adj = int(rline1( oom_score_adj = pid_to_oom_score_adj(pid)
'/proc/' + pid + '/oom_score_adj'))
if oom_score_adj >= 0: if oom_score_adj >= 0:
badness += badness_adj badness += badness_adj
@ -1122,8 +1121,7 @@ def pid_to_badness(pid):
badness += badness_adj badness += badness_adj
else: else:
if oom_score_adj is None: if oom_score_adj is None:
oom_score_adj = int(rline1( oom_score_adj = pid_to_oom_score_adj(pid)
'/proc/' + pid + '/oom_score_adj'))
if oom_score_adj >= 0: if oom_score_adj >= 0:
badness += badness_adj badness += badness_adj
@ -1136,8 +1134,7 @@ def pid_to_badness(pid):
badness += badness_adj badness += badness_adj
else: else:
if oom_score_adj is None: if oom_score_adj is None:
oom_score_adj = int(rline1( oom_score_adj = pid_to_oom_score_adj(pid)
'/proc/' + pid + '/oom_score_adj'))
if oom_score_adj >= 0: if oom_score_adj >= 0:
badness += badness_adj badness += badness_adj
@ -1157,58 +1154,7 @@ def pid_to_status(pid):
""" """
try: try:
with open('/proc/' + pid + '/status') as f: with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f:
for n, line in enumerate(f):
if n == 0:
name = line.split('\t')[1][:-1]
if n is state_index:
state = line.split('\t')[1][0]
continue
if n is ppid_index:
ppid = line.split('\t')[1][:-1]
continue
if n is uid_index:
uid = line.split('\t')[2]
continue
if n is vm_size_index:
vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if n is vm_rss_index:
vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if n is vm_swap_index:
vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
break
return name, state, ppid, uid, vm_size, vm_rss, vm_swap
except UnicodeDecodeError:
return pid_to_status_unicode(pid)
except FileNotFoundError:
return None
except ProcessLookupError:
return None
except ValueError:
return None
def pid_to_status_unicode(pid):
"""
"""
try:
with open('/proc/' + pid + '/status', 'rb') as f:
f_list = f.read().decode('utf-8', 'ignore').split('\n') f_list = f.read().decode('utf-8', 'ignore').split('\n')
for i in range(len(f_list)): for i in range(len(f_list)):
@ -1512,12 +1458,101 @@ def alive_pid_list():
return pid_list return pid_list
def pid_to_oom_score(pid):
try:
with open('/proc/{}/oom_score'.format(pid), 'rb', buffering=0) as f:
return int(f.read())
except FileNotFoundError:
return 0
except ProcessLookupError:
return 0
except NotADirectoryError:
return 0
def pid_to_oom_score_adj(pid):
try:
with open('/proc/{}/oom_score_adj'.format(pid), 'rb', buffering=0
) as f:
return int(f.read())
except FileNotFoundError:
return 0
except ProcessLookupError:
return 0
except NotADirectoryError:
return 0
def badness_pid_list():
"""
"""
pid_b_list = []
for pid in os.listdir('/proc'):
o = pid_to_oom_score(pid)
if o >= 1:
if pid[0].isdecimal() is False:
continue
if pid == self_pid or pid == '1':
continue
b = pid_to_badness(pid, o)[0]
# log('PID: {}, oom_score: {}, badness: {}, Name: {}'.format(
# pid, o, b, pid_to_name(pid)))
pid_b_list.append((pid, b))
return pid_b_list
def fast_find_victim():
"""
"""
ft1 = monotonic()
pid_badness_list = badness_pid_list()
real_proc_num = len(pid_badness_list)
if real_proc_num == 0:
log('Found {} tasks with non-zero oom_score (except init and '
'self)'.format(real_proc_num))
return None
# Make list of (pid, badness) tuples, sorted by 'badness' values
# print(pid_badness_list)
pid_tuple_list = sorted(
pid_badness_list, key=itemgetter(1), reverse=True)[0]
pid = pid_tuple_list[0]
victim_id = get_victim_id(pid)
# Get maximum 'badness' value
victim_badness = pid_tuple_list[1]
victim_name = pid_to_name(pid)
log('Found {} tasks with non-zero oom_score (except init and self)'.format(
real_proc_num))
log(
'Process with highest badness (found in {} ms):\n PID: {}, Na'
'me: {}, badness: {}'.format(
round((monotonic() - ft1) * 1000),
pid,
victim_name,
victim_badness
)
)
return pid, victim_badness, victim_name, victim_id
def find_victim(_print_proc_table): def find_victim(_print_proc_table):
""" """
Find the process with highest badness and its badness adjustment Find the process with highest badness and its badness adjustment
Return pid and badness Return pid and badness
""" """
if not _print_proc_table:
return fast_find_victim()
ft1 = monotonic() ft1 = monotonic()
pid_list = alive_pid_list() pid_list = alive_pid_list()
@ -1562,7 +1597,7 @@ def find_victim(_print_proc_table):
for pid in pid_list: for pid in pid_list:
badness = pid_to_badness(pid)[0] badness = pid_to_badness(pid, None)[0]
if badness is None: if badness is None:
continue continue
@ -1570,8 +1605,8 @@ def find_victim(_print_proc_table):
if _print_proc_table: if _print_proc_table:
try: try:
oom_score = rline1('/proc/' + pid + '/oom_score') oom_score = pid_to_oom_score(pid)
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') oom_score_adj = pid_to_oom_score_adj(pid)
except FileNotFoundError: except FileNotFoundError:
continue continue
@ -1609,8 +1644,8 @@ def find_victim(_print_proc_table):
pid.rjust(7), pid.rjust(7),
ppid.rjust(7), ppid.rjust(7),
str(badness).rjust(7), str(badness).rjust(7),
oom_score.rjust(9), str(oom_score).rjust(9),
oom_score_adj.rjust(13), str(oom_score_adj).rjust(13),
uid.rjust(10), uid.rjust(10),
state, state,
str(vm_size).rjust(6), str(vm_size).rjust(6),
@ -1666,67 +1701,7 @@ def find_victim_info(pid, victim_badness, name):
try: try:
with open('/proc/' + pid + '/status') as f: with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f:
for n, line in enumerate(f):
if n is state_index:
state = line.split('\t')[1].rstrip()
continue
if n is uid_index:
uid = line.split('\t')[2]
continue
if n is vm_size_index:
vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if n is vm_rss_index:
vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if detailed_rss:
if n is anon_index:
anon_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is file_index:
file_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is shmem_index:
shmem_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is vm_swap_index:
vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
break
if print_victim_cmdline:
cmdline = pid_to_cmdline(pid)
oom_score = rline1('/proc/' + pid + '/oom_score')
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
except FileNotFoundError:
x = 'The victim died in the search process: FileNotFoundError'
log(x)
update_stat_dict(x)
print_stat_dict()
return None
except ProcessLookupError:
x = 'The victim died in the search process: ProcessLookupError'
log(x)
update_stat_dict(x)
print_stat_dict()
return None
except UnicodeDecodeError:
with open('/proc/' + pid + '/status', 'rb') as f:
f_list = f.read().decode('utf-8', 'ignore').split('\n') f_list = f.read().decode('utf-8', 'ignore').split('\n')
for i in range(len(f_list)): for i in range(len(f_list)):
@ -1764,8 +1739,8 @@ def find_victim_info(pid, victim_badness, name):
if print_victim_cmdline: if print_victim_cmdline:
cmdline = pid_to_cmdline(pid) cmdline = pid_to_cmdline(pid)
oom_score = rline1('/proc/' + pid + '/oom_score') oom_score = pid_to_oom_score(pid)
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') oom_score_adj = pid_to_oom_score_adj(pid)
except IndexError: except IndexError:
x = 'The victim died in the search process: IndexError' x = 'The victim died in the search process: IndexError'
@ -1780,19 +1755,6 @@ def find_victim_info(pid, victim_badness, name):
print_stat_dict() print_stat_dict()
return None return None
except FileNotFoundError:
x = 'The victim died in the search process: FileNotFoundError'
log(x)
update_stat_dict(x)
print_stat_dict()
return None
except ProcessLookupError:
x = 'The victim died in the search process: ProcessLookupError'
log(x)
update_stat_dict(x)
print_stat_dict()
return None
len_vm = len(str(vm_size)) len_vm = len(str(vm_size))
try: try:
@ -2217,11 +2179,25 @@ def implement_corrective_action(
if x: if x:
victim_id = cached_victim_id victim_id = cached_victim_id
pid = victim_id.partition('_pid')[2] pid = victim_id.partition('_pid')[2]
victim_badness = pid_to_badness(pid)[0] victim_badness = pid_to_badness(pid, None)[0]
name = v_dict[victim_id]['name'] name = v_dict[victim_id]['name']
log('New victim is cached victim {} ({})'.format(pid, name)) log('New victim is cached victim {} ({})'.format(pid, name))
else: else:
pid, victim_badness, name, victim_id = find_victim(print_proc_table)
fff = find_victim(print_proc_table)
if fff is None:
if debug_sleep:
log('Sleep {} sec'.format(over_sleep))
sleep(over_sleep)
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
'<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
return psi_t0
pid, victim_badness, name, victim_id = fff
log('Recheck memory levels...') log('Recheck memory levels...')
@ -2269,6 +2245,8 @@ def implement_corrective_action(
else: else:
log('Thresholds is not exceeded now') log('Thresholds is not exceeded now')
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
'<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
return psi_t0 return psi_t0
for i in mem_info_list: for i in mem_info_list:
@ -2276,6 +2254,8 @@ def implement_corrective_action(
if new_threshold is None or new_threshold == 'WARN': if new_threshold is None or new_threshold == 'WARN':
log('Thresholds is not exceeded now') log('Thresholds is not exceeded now')
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
'<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
return psi_t0 return psi_t0
threshold = new_threshold threshold = new_threshold
@ -2295,7 +2275,7 @@ def implement_corrective_action(
dt, 1), max_soft_exit_time)) dt, 1), max_soft_exit_time))
if debug_sleep: if debug_sleep:
log('Sleep {} sec (over_sleep)'.format(over_sleep)) log('Sleep {} sec'.format(over_sleep))
sleep(over_sleep) sleep(over_sleep)
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<' log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
@ -2510,7 +2490,7 @@ def implement_corrective_action(
if vwd is None: if vwd is None:
if debug_sleep: if debug_sleep:
log('Sleep {} sec (over_sleep)'.format(over_sleep)) log('Sleep {} sec'.format(over_sleep))
sleep(over_sleep) sleep(over_sleep)
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<' log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'