Various improvements and fixes
Fix Something went wrong #56 Improve implement_corrective_action(): - Check VmRSS instead od /proc/[pid]/exe realpath to check process existene - Improve displaying total stat Improve check_permissions() Other improvements
This commit is contained in:
parent
0225fc3bff
commit
8e931f8f00
354
nohang/nohang
354
nohang/nohang
@ -17,8 +17,15 @@ def check_permissions():
|
|||||||
try:
|
try:
|
||||||
os.path.realpath('/proc/1/exe')
|
os.path.realpath('/proc/1/exe')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print('ERROR: missing CAP_SYS_PTRACE: {}'.format(e))
|
print('WARNING: missing CAP_SYS_PTRACE: {}'.format(e))
|
||||||
exit(1)
|
try:
|
||||||
|
os.kill(2, SIGTERM)
|
||||||
|
except Exception as e:
|
||||||
|
print('WARNING: cannot send a signal: {}'.format(e))
|
||||||
|
try:
|
||||||
|
rline1('/proc/1/oom_score')
|
||||||
|
except Exception as e:
|
||||||
|
print('WARNING: {}'.format(e))
|
||||||
|
|
||||||
|
|
||||||
def memload():
|
def memload():
|
||||||
@ -55,8 +62,11 @@ def memload():
|
|||||||
' '.format(
|
' '.format(
|
||||||
round(mem_available / 1024),
|
round(mem_available / 1024),
|
||||||
round(swap_free / 1024)))
|
round(swap_free / 1024)))
|
||||||
|
try:
|
||||||
luid_init = rline1('/proc/1/loginuid')
|
luid_init = rline1('/proc/1/loginuid')
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
exit(1)
|
||||||
luid_self = rline1('/proc/self/loginuid')
|
luid_self = rline1('/proc/self/loginuid')
|
||||||
if luid_init == luid_self:
|
if luid_init == luid_self:
|
||||||
print('The option is available only for logged in users.')
|
print('The option is available only for logged in users.')
|
||||||
@ -119,24 +129,28 @@ def exe(cmd):
|
|||||||
|
|
||||||
cmd_num_dict['cmd_num'] += 1
|
cmd_num_dict['cmd_num'] += 1
|
||||||
cmd_num = cmd_num_dict['cmd_num']
|
cmd_num = cmd_num_dict['cmd_num']
|
||||||
|
th_name = threading.current_thread().getName()
|
||||||
|
|
||||||
log('Execute the command ({}) in {}: {}'.format(
|
log('Executing the command ({}) in {}: {}'.format(
|
||||||
cmd_num,
|
cmd_num,
|
||||||
threading.current_thread().getName(),
|
th_name,
|
||||||
cmd_list))
|
cmd_list))
|
||||||
t3 = monotonic()
|
t3 = monotonic()
|
||||||
with Popen(cmd_list) as proc:
|
try:
|
||||||
try:
|
with Popen(cmd_list) as proc:
|
||||||
proc.wait(timeout=exe_timeout)
|
try:
|
||||||
exit_status = proc.poll()
|
proc.wait(timeout=exe_timeout)
|
||||||
t4 = monotonic()
|
exit_status = proc.poll()
|
||||||
log('Command ({}) execution completed in {} sec; exit status'
|
t4 = monotonic()
|
||||||
': {}'.format(cmd_num, round(t4 - t3, 3), exit_status))
|
log('Command ({}) execution completed in {} sec; exit status'
|
||||||
except TimeoutExpired:
|
': {}'.format(cmd_num, round(t4 - t3, 3), exit_status))
|
||||||
proc.kill()
|
except TimeoutExpired:
|
||||||
t4 = monotonic()
|
proc.kill()
|
||||||
log('TimeoutExpired for the command ({}) in {} sec'.format(
|
t4 = monotonic()
|
||||||
cmd_num, round(t4 - t3, 3)))
|
log('TimeoutExpired for the command ({}) in {} sec'.format(
|
||||||
|
cmd_num, round(t4 - t3, 3)))
|
||||||
|
except Exception as e:
|
||||||
|
log('Exception in {}: {}'.format(th_name, e))
|
||||||
|
|
||||||
|
|
||||||
def start_thread(func, *a, **k):
|
def start_thread(func, *a, **k):
|
||||||
@ -146,7 +160,6 @@ def start_thread(func, *a, **k):
|
|||||||
th_name = th.getName()
|
th_name = th.getName()
|
||||||
|
|
||||||
if debug_threading:
|
if debug_threading:
|
||||||
|
|
||||||
log('Starting {} from {}'.format(
|
log('Starting {} from {}'.format(
|
||||||
th_name, threading.current_thread().getName()
|
th_name, threading.current_thread().getName()
|
||||||
))
|
))
|
||||||
@ -221,7 +234,7 @@ def root_notify_env():
|
|||||||
# iterates over processes, find processes with suitable env
|
# iterates over processes, find processes with suitable env
|
||||||
for pid in os.listdir('/proc'):
|
for pid in os.listdir('/proc'):
|
||||||
|
|
||||||
if os.path.exists('/proc/' + pid + '/exe') is True:
|
if is_alive(pid):
|
||||||
one_env = re_pid_environ(pid)
|
one_env = re_pid_environ(pid)
|
||||||
unsorted_envs_list.append(one_env)
|
unsorted_envs_list.append(one_env)
|
||||||
|
|
||||||
@ -707,7 +720,7 @@ def signal_handler(signum, frame):
|
|||||||
log('Got the {} signal '.format(
|
log('Got the {} signal '.format(
|
||||||
sig_dict[signum]))
|
sig_dict[signum]))
|
||||||
|
|
||||||
update_stat_dict_and_print(None)
|
print_stat_dict()
|
||||||
|
|
||||||
m1 = monotonic()
|
m1 = monotonic()
|
||||||
pt1 = process_time()
|
pt1 = process_time()
|
||||||
@ -965,22 +978,26 @@ def pid_to_realpath(pid):
|
|||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return os.path.realpath('/proc/' + pid + '/exe')
|
return os.path.realpath('/proc/{}/exe'.format(pid))
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return ''
|
return ''
|
||||||
except ProcessLookupError:
|
except ProcessLookupError:
|
||||||
return ''
|
return ''
|
||||||
|
except PermissionError:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def pid_to_cwd(pid):
|
def pid_to_cwd(pid):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
return os.path.realpath('/proc/' + pid + '/cwd')
|
return os.path.realpath('/proc/{}/cwd'.format(pid))
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
return ''
|
return ''
|
||||||
except ProcessLookupError:
|
except ProcessLookupError:
|
||||||
return ''
|
return ''
|
||||||
|
except PermissionError:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def pid_to_uid(pid):
|
def pid_to_uid(pid):
|
||||||
@ -1275,38 +1292,30 @@ def mlockall():
|
|||||||
# log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
|
# log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
|
||||||
|
|
||||||
|
|
||||||
def update_stat_dict_and_print(key):
|
def update_stat_dict(key):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
if key is not None:
|
if key is not None:
|
||||||
|
|
||||||
if key not in stat_dict:
|
if key not in stat_dict:
|
||||||
|
|
||||||
stat_dict.update({key: 1})
|
stat_dict.update({key: 1})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
new_value = stat_dict[key] + 1
|
new_value = stat_dict[key] + 1
|
||||||
stat_dict.update({key: new_value})
|
stat_dict.update({key: new_value})
|
||||||
|
|
||||||
|
|
||||||
|
def print_stat_dict():
|
||||||
|
"""
|
||||||
|
"""
|
||||||
if print_statistics:
|
if print_statistics:
|
||||||
|
|
||||||
lsd = len(stat_dict)
|
lsd = len(stat_dict)
|
||||||
|
|
||||||
if lsd == 0:
|
if lsd == 0:
|
||||||
|
|
||||||
log('No corrective actions applied in the last {}'.format(
|
log('No corrective actions applied in the last {}'.format(
|
||||||
format_time(monotonic() - start_time)))
|
format_time(monotonic() - start_time)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
stats_msg = 'What happened in the last {}:'.format(
|
stats_msg = 'What happened in the last {}:'.format(
|
||||||
format_time(monotonic() - start_time))
|
format_time(monotonic() - start_time))
|
||||||
|
|
||||||
for i in stat_dict:
|
for i in stat_dict:
|
||||||
|
|
||||||
stats_msg += '\n {}: {}'.format(i, stat_dict[i])
|
stats_msg += '\n {}: {}'.format(i, stat_dict[i])
|
||||||
|
|
||||||
log(stats_msg)
|
log(stats_msg)
|
||||||
|
|
||||||
|
|
||||||
@ -1339,7 +1348,7 @@ def find_psi_metrics_value(psi_path, psi_metrics):
|
|||||||
def check_mem_and_swap():
|
def check_mem_and_swap():
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
with open('/proc/meminfo', 'rb', buffering=0) as f:
|
with open('/proc/meminfo', 'rb') as f:
|
||||||
m_list = f.read().decode().split(' kB\n')
|
m_list = f.read().decode().split(' kB\n')
|
||||||
return (int(m_list[2].split(':')[1]),
|
return (int(m_list[2].split(':')[1]),
|
||||||
int(m_list[swap_total_index].split(':')[1]),
|
int(m_list[swap_total_index].split(':')[1]),
|
||||||
@ -1512,25 +1521,40 @@ def zram_stat(zram_id):
|
|||||||
return disksize, mem_used_total # BYTES, str
|
return disksize, mem_used_total # BYTES, str
|
||||||
|
|
||||||
|
|
||||||
def get_pid_list():
|
def is_alive(pid):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open('/proc/{}/statm'.format(pid), 'rb', buffering=0) as f:
|
||||||
|
rss = f.read().decode().split(' ')[1]
|
||||||
|
if rss != '0':
|
||||||
|
return True
|
||||||
|
except FileNotFoundError:
|
||||||
|
return False
|
||||||
|
except ProcessLookupError:
|
||||||
|
return False
|
||||||
|
except NotADirectoryError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def alive_pid_list():
|
||||||
"""
|
"""
|
||||||
Find pid list expect kthreads and zombies
|
|
||||||
"""
|
"""
|
||||||
pid_list = []
|
pid_list = []
|
||||||
for pid in os.listdir('/proc'):
|
for pid in os.listdir('/proc'):
|
||||||
if os.path.exists('/proc/' + pid + '/exe'):
|
|
||||||
pid_list.append(pid)
|
|
||||||
return pid_list
|
|
||||||
|
|
||||||
|
|
||||||
def get_non_decimal_pids():
|
|
||||||
"""
|
|
||||||
"""
|
|
||||||
non_decimal_list = []
|
|
||||||
for pid in pid_list:
|
|
||||||
if pid[0].isdecimal() is False:
|
if pid[0].isdecimal() is False:
|
||||||
non_decimal_list.append(pid)
|
continue
|
||||||
return non_decimal_list
|
|
||||||
|
if is_alive(pid):
|
||||||
|
pid_list.append(pid)
|
||||||
|
|
||||||
|
pid_list.remove(self_pid)
|
||||||
|
|
||||||
|
if '1' in pid_list:
|
||||||
|
pid_list.remove('1')
|
||||||
|
|
||||||
|
return pid_list
|
||||||
|
|
||||||
|
|
||||||
def find_victim(_print_proc_table):
|
def find_victim(_print_proc_table):
|
||||||
@ -1541,18 +1565,7 @@ def find_victim(_print_proc_table):
|
|||||||
|
|
||||||
ft1 = monotonic()
|
ft1 = monotonic()
|
||||||
|
|
||||||
pid_list = get_pid_list()
|
pid_list = alive_pid_list()
|
||||||
|
|
||||||
pid_list.remove(self_pid)
|
|
||||||
|
|
||||||
if '1' in pid_list:
|
|
||||||
pid_list.remove('1')
|
|
||||||
|
|
||||||
non_decimal_list = get_non_decimal_pids()
|
|
||||||
|
|
||||||
for i in non_decimal_list:
|
|
||||||
if i in pid_list:
|
|
||||||
pid_list.remove(i)
|
|
||||||
|
|
||||||
pid_badness_list = []
|
pid_badness_list = []
|
||||||
|
|
||||||
@ -1584,6 +1597,7 @@ def find_victim(_print_proc_table):
|
|||||||
|
|
||||||
hr = '#' * 107
|
hr = '#' * 107
|
||||||
|
|
||||||
|
log('Tasks state (memory values in mebibytes):')
|
||||||
log(hr)
|
log(hr)
|
||||||
log('# PID PPID badness oom_score oom_score_adj e'
|
log('# PID PPID badness oom_score oom_score_adj e'
|
||||||
'UID S VmSize VmRSS VmSwap Name {}'.format(
|
'UID S VmSize VmRSS VmSwap Name {}'.format(
|
||||||
@ -1674,7 +1688,7 @@ def find_victim(_print_proc_table):
|
|||||||
if _print_proc_table:
|
if _print_proc_table:
|
||||||
log(hr)
|
log(hr)
|
||||||
|
|
||||||
log('Found {} processes with existing /proc/[pid]/exe realpath'.format(
|
log('Found {} tasks with non-zero VmRSS (except init and self)'.format(
|
||||||
real_proc_num))
|
real_proc_num))
|
||||||
|
|
||||||
log(
|
log(
|
||||||
@ -1705,12 +1719,6 @@ def find_victim_info(pid, victim_badness, name):
|
|||||||
state = line.split('\t')[1].rstrip()
|
state = line.split('\t')[1].rstrip()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
"""
|
|
||||||
if n is ppid_index:
|
|
||||||
# ppid = line.split('\t')[1]
|
|
||||||
continue
|
|
||||||
"""
|
|
||||||
|
|
||||||
if n is uid_index:
|
if n is uid_index:
|
||||||
uid = line.split('\t')[2]
|
uid = line.split('\t')[2]
|
||||||
continue
|
continue
|
||||||
@ -1750,14 +1758,16 @@ def find_victim_info(pid, victim_badness, name):
|
|||||||
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
|
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
|
||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
log('The victim died in the search process: FileNotFoundError')
|
x = 'The victim died in the search process: FileNotFoundError'
|
||||||
update_stat_dict_and_print(
|
log(x)
|
||||||
'The victim died in the search process: FileNotFoundError')
|
update_stat_dict(x)
|
||||||
|
print_stat_dict()
|
||||||
return None
|
return None
|
||||||
except ProcessLookupError:
|
except ProcessLookupError:
|
||||||
log('The victim died in the search process: ProcessLookupError')
|
x = 'The victim died in the search process: ProcessLookupError'
|
||||||
update_stat_dict_and_print(
|
log(x)
|
||||||
'The victim died in the search process: ProcessLookupError')
|
update_stat_dict(x)
|
||||||
|
print_stat_dict()
|
||||||
return None
|
return None
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
|
|
||||||
@ -1803,43 +1813,46 @@ def find_victim_info(pid, victim_badness, name):
|
|||||||
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
|
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
|
||||||
|
|
||||||
except IndexError:
|
except IndexError:
|
||||||
log('The victim died in the search process: IndexError')
|
x = 'The victim died in the search process: IndexError'
|
||||||
update_stat_dict_and_print(
|
log(x)
|
||||||
'The victim died in the search process: IndexError')
|
update_stat_dict(x)
|
||||||
|
print_stat_dict()
|
||||||
return None
|
return None
|
||||||
except ValueError:
|
except ValueError:
|
||||||
log('The victim died in the search process: ValueError')
|
x = 'The victim died in the search process: ValueError'
|
||||||
update_stat_dict_and_print(
|
log(x)
|
||||||
'The victim died in the search process: ValueError')
|
update_stat_dict(x)
|
||||||
|
print_stat_dict()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
log('The victim died in the search process: FileNotFoundError')
|
x = 'The victim died in the search process: FileNotFoundError'
|
||||||
update_stat_dict_and_print(
|
log(x)
|
||||||
'The victim died in the search process: FileNotFoundError')
|
update_stat_dict(x)
|
||||||
|
print_stat_dict()
|
||||||
return None
|
return None
|
||||||
except ProcessLookupError:
|
except ProcessLookupError:
|
||||||
log('The victim died in the search process: ProcessLookupError')
|
x = 'The victim died in the search process: ProcessLookupError'
|
||||||
update_stat_dict_and_print(
|
log(x)
|
||||||
'The victim died in the search process: ProcessLookupError')
|
update_stat_dict(x)
|
||||||
|
print_stat_dict()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
len_vm = len(str(vm_size))
|
len_vm = len(str(vm_size))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
realpath = os.path.realpath('/proc/' + pid + '/exe')
|
realpath = pid_to_realpath(pid)
|
||||||
|
cwd = pid_to_cwd(pid)
|
||||||
cwd = os.path.realpath('/proc/' + pid + '/cwd')
|
|
||||||
|
|
||||||
nssid = pid_to_nssid(pid)
|
nssid = pid_to_nssid(pid)
|
||||||
|
|
||||||
victim_lifetime = format_time(uptime() - pid_to_starttime(pid))
|
victim_lifetime = format_time(uptime() - pid_to_starttime(pid))
|
||||||
victim_cgroup_v1 = pid_to_cgroup_v1(pid)
|
victim_cgroup_v1 = pid_to_cgroup_v1(pid)
|
||||||
victim_cgroup_v2 = pid_to_cgroup_v2(pid)
|
victim_cgroup_v2 = pid_to_cgroup_v2(pid)
|
||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
log('The victim died in the search process: FileNotFoundError')
|
x = 'The victim died in the search process: FileNotFoundError'
|
||||||
update_stat_dict_and_print(
|
log(x)
|
||||||
'The victim died in the search process: FileNotFoundError')
|
update_stat_dict(x)
|
||||||
|
print_stat_dict()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
ancestry = pid_to_ancestry(pid, max_victim_ancestry_depth)
|
ancestry = pid_to_ancestry(pid, max_victim_ancestry_depth)
|
||||||
@ -2156,8 +2169,7 @@ def is_victim_alive(victim_id):
|
|||||||
if victim_id != new_victim_id:
|
if victim_id != new_victim_id:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
exe_exists = os.path.exists('/proc/{}/exe'.format(pid))
|
if is_alive(pid):
|
||||||
if exe_exists:
|
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
state = pid_to_state(pid)
|
state = pid_to_state(pid)
|
||||||
@ -2295,29 +2307,29 @@ def implement_corrective_action(
|
|||||||
|
|
||||||
vwd = None # Victim Will Die
|
vwd = None # Victim Will Die
|
||||||
|
|
||||||
|
if threshold is SIGTERM:
|
||||||
|
if victim_id in v_dict:
|
||||||
|
dt = monotonic() - v_dict[victim_id]['time']
|
||||||
|
if dt > max_soft_exit_time:
|
||||||
|
log('max_soft_exit_time is exceeded: the '
|
||||||
|
'victim will get SIGKILL')
|
||||||
|
threshold = SIGKILL
|
||||||
|
else:
|
||||||
|
log('max_soft_exit_time is not exceeded ('
|
||||||
|
'{} < {}) for the victim'.format(round(
|
||||||
|
dt, 1), max_soft_exit_time))
|
||||||
|
|
||||||
|
if debug_sleep:
|
||||||
|
log('Sleep {} sec (over_sleep)'.format(over_sleep))
|
||||||
|
sleep(over_sleep)
|
||||||
|
|
||||||
|
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
|
||||||
|
'<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
|
||||||
|
|
||||||
|
return psi_t0
|
||||||
|
|
||||||
if victim_badness >= min_badness:
|
if victim_badness >= min_badness:
|
||||||
|
|
||||||
if threshold is SIGTERM:
|
|
||||||
if victim_id in v_dict:
|
|
||||||
dt = monotonic() - v_dict[victim_id]['time']
|
|
||||||
if dt > max_soft_exit_time:
|
|
||||||
log('max_soft_exit_time is exceeded: the '
|
|
||||||
'victim will get SIGKILL')
|
|
||||||
threshold = SIGKILL
|
|
||||||
else:
|
|
||||||
log('max_soft_exit_time is not exceeded ('
|
|
||||||
'{} < {}) for the victim'.format(round(
|
|
||||||
dt, 1), max_soft_exit_time))
|
|
||||||
|
|
||||||
if debug_sleep:
|
|
||||||
log('Sleep {} sec (over_sleep)'.format(over_sleep))
|
|
||||||
sleep(over_sleep)
|
|
||||||
|
|
||||||
log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
|
|
||||||
'<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
|
|
||||||
|
|
||||||
return psi_t0
|
|
||||||
|
|
||||||
if print_victim_status:
|
if print_victim_status:
|
||||||
victim_info = find_victim_info(pid, victim_badness, name)
|
victim_info = find_victim_info(pid, victim_badness, name)
|
||||||
log(victim_info)
|
log(victim_info)
|
||||||
@ -2345,54 +2357,89 @@ def implement_corrective_action(
|
|||||||
soft_match = True
|
soft_match = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
start_action = monotonic()
|
||||||
|
|
||||||
if soft_match:
|
if soft_match:
|
||||||
|
|
||||||
cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name(
|
cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name(
|
||||||
pid)).replace('$SERVICE', service)
|
pid)).replace('$SERVICE', service)
|
||||||
start_thread(exe, cmd)
|
|
||||||
|
preventing_oom_message = 'Implementing a corrective action:\n ' \
|
||||||
|
'Executing the command: {}'.format(cmd)
|
||||||
|
|
||||||
|
log(preventing_oom_message)
|
||||||
|
|
||||||
|
err = start_thread(exe, cmd)
|
||||||
|
|
||||||
|
if err == 1:
|
||||||
|
key = 'Cannot execute the command in the new thread'
|
||||||
|
update_stat_dict(key)
|
||||||
|
log(key)
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
update_stat_dict('Executing the command "{}"'.format(command))
|
||||||
|
|
||||||
response_time = monotonic() - time0
|
response_time = monotonic() - time0
|
||||||
|
|
||||||
exit_status = None
|
log('Total response time: {} ms'.format(round(
|
||||||
|
response_time * 1000)))
|
||||||
|
|
||||||
preventing_oom_message = 'Implement a corrective act' \
|
print_stat_dict()
|
||||||
'ion:\n Run the command: {}' \
|
|
||||||
'\n Exit status: {}; total response ' \
|
|
||||||
'time: {} ms'.format(
|
|
||||||
cmd,
|
|
||||||
exit_status,
|
|
||||||
round(response_time * 1000))
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
preventing_oom_message = 'Implementing a corrective action:\n ' \
|
||||||
|
'Sending {} to the victim'.format(
|
||||||
|
sig_dict[threshold])
|
||||||
|
|
||||||
|
log(preventing_oom_message)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
os.kill(int(pid), threshold)
|
os.kill(int(pid), threshold)
|
||||||
|
|
||||||
|
update_stat_dict(
|
||||||
|
'[ OK ] Sending {} to {}'.format(sig_dict[threshold], name)
|
||||||
|
)
|
||||||
|
|
||||||
response_time = monotonic() - time0
|
response_time = monotonic() - time0
|
||||||
|
|
||||||
send_result = 'total response time: {} ms'.format(
|
send_result = 'OK; total response time: {} ms'.format(
|
||||||
round(response_time * 1000))
|
round(response_time * 1000))
|
||||||
|
|
||||||
preventing_oom_message = 'Implementing a corrective action:' \
|
log(send_result)
|
||||||
'\n Sending {} to the victim; {}'.format(
|
|
||||||
sig_dict[threshold], send_result)
|
|
||||||
|
|
||||||
if threshold is SIGKILL:
|
if threshold is SIGKILL:
|
||||||
vwd = True
|
vwd = True
|
||||||
|
|
||||||
except FileNotFoundError:
|
print_stat_dict()
|
||||||
vwd = True
|
|
||||||
key = 'The victim died in the search process: ' \
|
|
||||||
'FileNotFoundError'
|
|
||||||
except ProcessLookupError:
|
|
||||||
vwd = True
|
|
||||||
key = 'The victim died in the search process: ' \
|
|
||||||
'ProcessLookupError'
|
|
||||||
|
|
||||||
try:
|
except FileNotFoundError:
|
||||||
log(preventing_oom_message)
|
|
||||||
except UnboundLocalError:
|
vwd = True
|
||||||
pass
|
key = 'Cannot send a signal: FileNotFoundError'
|
||||||
|
update_stat_dict(key)
|
||||||
|
print_stat_dict()
|
||||||
|
log(key)
|
||||||
|
|
||||||
|
except ProcessLookupError:
|
||||||
|
|
||||||
|
vwd = True
|
||||||
|
key = 'Cannot send a signal: ProcessLookupError'
|
||||||
|
update_stat_dict(key)
|
||||||
|
print_stat_dict()
|
||||||
|
log(key)
|
||||||
|
|
||||||
|
except PermissionError:
|
||||||
|
vwd = False
|
||||||
|
key = 'Cannot send a signal: PermissionError'
|
||||||
|
log(key)
|
||||||
|
update_stat_dict(key)
|
||||||
|
print_stat_dict()
|
||||||
|
log('Sleep {}s'.format(post_soft_action_delay))
|
||||||
|
sleep(post_soft_action_delay)
|
||||||
|
# do not send signal twice!
|
||||||
|
|
||||||
if not vwd:
|
if not vwd:
|
||||||
if victim_id not in v_dict:
|
if victim_id not in v_dict:
|
||||||
@ -2404,6 +2451,8 @@ def implement_corrective_action(
|
|||||||
|
|
||||||
last_action_dict['t'] = kill_timestamp = monotonic()
|
last_action_dict['t'] = kill_timestamp = monotonic()
|
||||||
|
|
||||||
|
kill_timestamp = start_action
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
sleep(0.01)
|
sleep(0.01)
|
||||||
d = monotonic() - kill_timestamp
|
d = monotonic() - kill_timestamp
|
||||||
@ -2449,13 +2498,6 @@ def implement_corrective_action(
|
|||||||
': {} MiB, SwapFree: {} MiB'.format(
|
': {} MiB, SwapFree: {} MiB'.format(
|
||||||
round(ma_mib, 1), round(sf_mib, 1)))
|
round(ma_mib, 1), round(sf_mib, 1)))
|
||||||
|
|
||||||
if soft_match is False:
|
|
||||||
key = 'Sending {} to {}'.format(sig_dict[threshold], name)
|
|
||||||
update_stat_dict_and_print(key)
|
|
||||||
else:
|
|
||||||
key = "Run the command '{}'".format(command)
|
|
||||||
update_stat_dict_and_print(key)
|
|
||||||
|
|
||||||
if threshold is SIGKILL and post_kill_exe != '':
|
if threshold is SIGKILL and post_kill_exe != '':
|
||||||
|
|
||||||
cmd = post_kill_exe.replace('$PID', pid).replace(
|
cmd = post_kill_exe.replace('$PID', pid).replace(
|
||||||
@ -2474,8 +2516,11 @@ def implement_corrective_action(
|
|||||||
else:
|
else:
|
||||||
|
|
||||||
response_time = monotonic() - time0
|
response_time = monotonic() - time0
|
||||||
victim_badness_is_too_small = 'victim badness ({}) < min_b' \
|
|
||||||
'adness ({}); nothing to do; response time: {} ms'.format(
|
victim_badness_is_too_small = 'victim (PID: {}, Name: {}) badness ' \
|
||||||
|
'({}) < min_badness ({}); nothing to do; response tim' \
|
||||||
|
'e: {} ms'.format(
|
||||||
|
pid, name,
|
||||||
victim_badness,
|
victim_badness,
|
||||||
min_badness,
|
min_badness,
|
||||||
round(response_time * 1000))
|
round(response_time * 1000))
|
||||||
@ -2484,7 +2529,8 @@ def implement_corrective_action(
|
|||||||
|
|
||||||
# update stat_dict
|
# update stat_dict
|
||||||
key = 'victim badness < min_badness'
|
key = 'victim badness < min_badness'
|
||||||
update_stat_dict_and_print(key)
|
update_stat_dict(key)
|
||||||
|
print_stat_dict()
|
||||||
|
|
||||||
if vwd is None:
|
if vwd is None:
|
||||||
|
|
||||||
@ -2696,7 +2742,7 @@ separate_log = False # will be overwritten after parse config
|
|||||||
cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
|
cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
|
||||||
|
|
||||||
|
|
||||||
pid_list = get_pid_list()
|
pid_list = alive_pid_list()
|
||||||
|
|
||||||
|
|
||||||
print_proc_table_flag = False
|
print_proc_table_flag = False
|
||||||
|
Loading…
Reference in New Issue
Block a user