Print top-15 task by badness before corrective action

This commit is contained in:
Alexey Avramov 2020-04-01 03:31:57 +09:00
parent a7cd7f2bba
commit 24173cbc47

View File

@ -1503,42 +1503,51 @@ def badness_pid_list():
def fast_find_victim(): def fast_find_victim():
""" """
""" """
ft1 = monotonic() ft1 = monotonic()
pid_badness_list = badness_pid_list() pid_badness_list = badness_pid_list()
real_proc_num = len(pid_badness_list) real_proc_num = len(pid_badness_list)
if real_proc_num == 0: if real_proc_num == 0:
log('Found {} tasks with non-zero oom_score (except init and ' log('Found {} tasks with non-zero oom_score (except init and self) '
'self)'.format(real_proc_num)) 'in {}ms'.format(real_proc_num, round((monotonic() - ft1) * 1000)))
return None return None
# Make list of (pid, badness) tuples, sorted by 'badness' values log('Found {} tasks with non-zero oom_score (except init and self) '
# print(pid_badness_list) 'in {}ms'.format(real_proc_num, round((monotonic() - ft1) * 1000)))
pid_tuple_list = sorted(
pid_badness_list, key=itemgetter(1), reverse=True)[0]
pid = pid_tuple_list[0] # Make list of (pid, badness) tuples, sorted by 'badness' values
pid_badness_list_sorted = sorted(
pid_badness_list,
key=itemgetter(1),
reverse=True)
m0 = monotonic()
top_n = 15
if real_proc_num < top_n:
top_n = real_proc_num
log('TOP-{} tasks by badness:'.format(top_n))
log(' Name PID badness')
log(' --------------- ------- -------')
for pid_badness in pid_badness_list_sorted[0:top_n]:
p = pid_badness[0]
b = str(pid_badness[1])
n = pid_to_name(p)
log(' {} {} {}'.format(n.ljust(15), p.rjust(7), b.rjust(7)))
pid = pid_badness_list_sorted[0][0]
victim_id = get_victim_id(pid) victim_id = get_victim_id(pid)
# Get maximum 'badness' value # Get maximum 'badness' value
victim_badness = pid_tuple_list[1] victim_badness = pid_badness_list_sorted[0][1]
victim_name = pid_to_name(pid) victim_name = pid_to_name(pid)
log('Found {} tasks with non-zero oom_score (except init and self)'.format( log('TOP printed in {}ms; process with highest badness:\n PID: {}, na'
real_proc_num))
log(
'Process with highest badness (found in {} ms):\n PID: {}, Na'
'me: {}, badness: {}'.format( 'me: {}, badness: {}'.format(
round((monotonic() - ft1) * 1000), round((monotonic() - m0) * 1000),
pid, pid,
victim_name, victim_name,
victim_badness victim_badness
) ))
)
return pid, victim_badness, victim_name, victim_id return pid, victim_badness, victim_name, victim_id
@ -2183,12 +2192,27 @@ def implement_corrective_action(
log('New victim is cached victim {} ({})'.format(pid, name)) log('New victim is cached victim {} ({})'.format(pid, name))
else: else:
s1 = set(os.listdir('/proc'))
fff = find_victim(print_proc_table) fff = find_victim(print_proc_table)
# sleep(0.1)
s2 = set(os.listdir('/proc'))
dset = s1 - s2
if len(dset) > 0:
log('During the search for the victim, the processes were '
'completed: {}'.format(dset))
sleep(over_sleep)
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
'<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
return psi_t0
if fff is None: if fff is None:
if debug_sleep: if debug_sleep:
log('Sleep {} sec'.format(over_sleep)) log('Sleep {}s'.format(over_sleep))
sleep(over_sleep) sleep(over_sleep)
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<' log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
@ -2274,7 +2298,7 @@ def implement_corrective_action(
dt, 1), max_soft_exit_time)) dt, 1), max_soft_exit_time))
if debug_sleep: if debug_sleep:
log('Sleep {} sec'.format(over_sleep)) log('Sleep {}s'.format(over_sleep))
sleep(over_sleep) sleep(over_sleep)
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<' log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
@ -2286,7 +2310,15 @@ def implement_corrective_action(
if print_victim_status: if print_victim_status:
victim_info = find_victim_info(pid, victim_badness, name) victim_info = find_victim_info(pid, victim_badness, name)
if victim_info is not None:
log(victim_info) log(victim_info)
else:
sleep(over_sleep)
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
'<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
return psi_t0
soft_match = False soft_match = False
if soft_actions and threshold is SIGTERM: if soft_actions and threshold is SIGTERM:
@ -2441,12 +2473,12 @@ def implement_corrective_action(
if vwd and d > sensitivity_test_time + 10: if vwd and d > sensitivity_test_time + 10:
log('The victim doesn\'t respond on corrective action' log('The victim doesn\'t respond on corrective action'
' in {} sec'.format(round(d, 3))) ' in {}s'.format(round(d, 3)))
break break
if not vwd and d > sensitivity_test_time: if not vwd and d > sensitivity_test_time:
log('The victim doesn\'t respond on corrective action' log('The victim doesn\'t respond on corrective action'
' in {} sec'.format(round(d, 3))) ' in {}s'.format(round(d, 3)))
break break
elif iva == 2: elif iva == 2:
@ -2454,7 +2486,7 @@ def implement_corrective_action(
else: else:
log('The victim became a zombie in {} sec'.format(round(d, 3))) log('The victim became a zombie in {}s'.format(round(d, 3)))
if victim_id in v_dict: if victim_id in v_dict:
v_dict.pop(victim_id) v_dict.pop(victim_id)
@ -2506,7 +2538,7 @@ def implement_corrective_action(
if vwd is None: if vwd is None:
if debug_sleep: if debug_sleep:
log('Sleep {} sec'.format(over_sleep)) log('Sleep {}s'.format(over_sleep))
sleep(over_sleep) sleep(over_sleep)
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<' log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
@ -2520,7 +2552,7 @@ def sleep_after_check_mem():
if stable_sleep: if stable_sleep:
if debug_sleep: if debug_sleep:
log('Sleep {} sec'.format(min_sleep)) log('Sleep {}s'.format(min_sleep))
stdout.flush() stdout.flush()
sleep(min_sleep) sleep(min_sleep)
return None return None
@ -2572,7 +2604,7 @@ def sleep_after_check_mem():
pass pass
if debug_sleep: if debug_sleep:
log('Sleep {} sec (t_mem={}, t_swap={}{})'.format(round(t, 2), round( log('Sleep {}s (t_mem={}, t_swap={}{})'.format(round(t, 2), round(
t_mem, 2), round(t_swap, 2), z)) t_mem, 2), round(t_swap, 2), z))
stdout.flush() stdout.flush()