fix CLI input; fix UnicodeDecodeError; add Lifetime to victim info; add max_post_sigterm_victim_lifetime

This commit is contained in:
Alexey Avramov 2019-02-28 02:23:41 +09:00
parent 7e34a6e03d
commit 7b154d2ae9

701
nohang
View File

@ -21,30 +21,7 @@ optional arguments:
./nohang.conf, /etc/nohang/nohang.conf""" ./nohang.conf, /etc/nohang/nohang.conf"""
if len(argv) == 1: SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
if os.path.exists('./nohang.conf'):
config = cd = os.getcwd() + '/nohang.conf'
else:
config = '/etc/nohang/nohang.conf'
elif len(argv) == 2:
if argv[1] == '--help' or argv[1] == '-h':
errprint(help_mess)
exit(1)
else:
errprint('Invalid CLI input')
exit(1)
elif len(argv) > 3:
errprint('Invalid CLI input')
exit(1)
else:
if argv[1] == '--config' or argv[1] == '-c':
config = argv[2]
else:
errprint('Invalid option: {}'.format(argv[1]))
exit(1)
conf_err_mess = 'Invalid config. Exit.' conf_err_mess = 'Invalid config. Exit.'
@ -85,20 +62,51 @@ print_proc_table = False
min_mem_report_interval = 5 min_mem_report_interval = 5
post_kill_exe = '' post_kill_exe = ''
victim_dict = dict()
max_ancestry_depth = 1
max_post_sigterm_victim_lifetime = 9
########################################################################## ##########################################################################
# define functions # define functions
def errprint(text): def uptime():
print(text, file=stderr, flush=True) return float(rline1('/proc/uptime').split(' ')[0])
def pid_to_starttime(pid):
try:
starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[
2].split(' ')[20]
except UnicodeDecodeError:
print('LOL')
with open('/proc/' + pid + '/stat', 'rb') as f:
starttime = f.read().decode('utf-8', 'ignore').rpartition(
')')[2].split(' ')[20]
return float(starttime) / SC_CLK_TCK
def get_victim_id(pid):
# todo: handle UnicodeDecodeError
return pid + '-' + rline1(
'/proc/' + pid + '/stat').rpartition(')')[2].split(' ')[20]
def errprint(*text):
print(*text, file=stderr, flush=True)
def mlockall(): def mlockall():
"""Lock all memory to prevent swapping nohang process."""
MCL_CURRENT = 1 MCL_CURRENT = 1
MCL_FUTURE = 2 MCL_FUTURE = 2
@ -188,17 +196,17 @@ def check_zram():
disksize_sum += int(stat[0]) disksize_sum += int(stat[0])
mem_used_total_sum += int(stat[1]) mem_used_total_sum += int(stat[1])
ZRAM_DISKSIZE_FACTOR = 0.0042 # Means that when setting zram disksize = 1 GiB available memory
# Означает, что при задани zram disksize = 1 GiB доступная память # decrease by 0.0042 GiB.
# уменьшится на 0.0042 GiB. # Found experimentally, requires clarification with different kernaels and architectures.
# Найден экспериментально, требует уточнения с разными ядрами и архитектурами. # On small disk drives (up to gigabyte) it can be more, up to 0.0045.
# На небольших дисксайзах (до гигабайта) может быть больше, до 0.0045. # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should be 0.001:
# Создатель модуля zram утверждает, что ZRAM_DISKSIZE_FACTOR доожен быть 0.001:
# ("zram uses about 0.1% of the size of the disk" # ("zram uses about 0.1% of the size of the disk"
# - https://www.kernel.org/doc/Documentation/blockdev/zram.txt), # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
# но это утверждение противоречит опытным данным. # but this statement contradicts the experimental data.
# ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize
# found experimentally # Found experimentally.
ZRAM_DISKSIZE_FACTOR = 0.0042
return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0 return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
@ -276,9 +284,15 @@ def conf_parse_bool(param):
def rline1(path): def rline1(path):
"""read 1st line from path.""" """read 1st line from path."""
with open(path) as f: try:
for line in f: with open(path) as f:
return line[:-1] for line in f:
return line[:-1]
except UnicodeDecodeError:
# print('UDE rline1', path)
with open(path, 'rb') as f:
return f.read(999).decode(
'utf-8', 'ignore').split('\n')[0]
def kib_to_mib(num): def kib_to_mib(num):
@ -368,19 +382,28 @@ def pid_to_ppid(pid):
f_list = f.read().decode('utf-8', 'ignore').split('\n') f_list = f.read().decode('utf-8', 'ignore').split('\n')
for i in range(len(f_list)): for i in range(len(f_list)):
if i is ppid_index: if i is ppid_index:
ppid = f_list[i].split('\t')[1] return f_list[i].split('\t')[1]
def pid_to_ancestry(pid): def pid_to_ancestry(pid, max_ancestry_depth=1):
if max_ancestry_depth == 1:
ppid = pid_to_ppid(pid)
pname = pid_to_name(ppid)
return '\n PPID: {} ({})'.format(ppid, pname)
if max_ancestry_depth == 0:
return ''
anc_list = [] anc_list = []
while True: for i in range(max_ancestry_depth):
ppid = pid_to_ppid(pid) ppid = pid_to_ppid(pid)
pname = pid_to_name(ppid) pname = pid_to_name(ppid)
anc_list.append((ppid, pname)) anc_list.append((ppid, pname))
if ppid == '1': if ppid == '1':
break break
pid = ppid pid = ppid
print('Ancestry: ', anc_list) a = ''
for i in anc_list:
a = a + ' <= PID {} ({})'.format(i[0], i[1])
return '\n Ancestry: ' + a[4:]
def pid_to_cmdline(pid): def pid_to_cmdline(pid):
@ -438,7 +461,7 @@ def send_notify_warn():
''' '''
# find process with max badness # find process with max badness
fat_tuple = fattest() fat_tuple = find_victim()
pid = fat_tuple[0] pid = fat_tuple[0]
name = pid_to_name(pid) name = pid_to_name(pid)
@ -580,12 +603,49 @@ def get_non_decimal_pids():
return non_decimal_list return non_decimal_list
def fattest(): def pid_to_badness(pid):
"""Find and modify badness (if it needs)."""
try:
oom_score = int(rline1('/proc/' + pid + '/oom_score'))
badness = oom_score
if decrease_oom_score_adj:
oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
if badness > oom_score_adj_max and oom_score_adj > 0:
badness = badness - oom_score_adj + oom_score_adj_max
if regex_matching:
name = pid_to_name(pid)
for re_tup in processname_re_list:
if search(re_tup[1], name) is not None:
badness += int(re_tup[0])
if re_match_cmdline:
cmdline = pid_to_cmdline(pid)
for re_tup in cmdline_re_list:
if search(re_tup[1], cmdline) is not None:
badness += int(re_tup[0])
if re_match_uid:
uid = pid_to_uid(pid)
for re_tup in uid_re_list:
if search(re_tup[1], uid) is not None:
badness += int(re_tup[0])
return badness, oom_score
except FileNotFoundError:
return None, None
except ProcessLookupError:
return None, None
def find_victim():
""" """
Find the process with highest badness and its badness adjustment Find the process with highest badness and its badness adjustment
Return pid and badness Return pid and badness
-> find_mem_hog() or find_victim() or find_worst_process()
""" """
ft1 = time() ft1 = time()
@ -605,52 +665,25 @@ def fattest():
pid_badness_list = [] pid_badness_list = []
if print_proc_table: if print_proc_table:
print(' PID badness Name eUID') print(' PID badness Name eUID cmdline')
print('------- ------- --------------- ----------') print('------- ------- --------------- ---------- -------')
for pid in pid_list: for pid in pid_list:
# find and modify badness (if it needs) badness = pid_to_badness(pid)[0]
try: if badness is None:
badness = int(rline1('/proc/' + pid + '/oom_score'))
if decrease_oom_score_adj:
oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
if badness > oom_score_adj_max and oom_score_adj > 0:
badness = badness - oom_score_adj + oom_score_adj_max
if regex_matching:
name = pid_to_name(pid)
for re_tup in processname_re_list:
if search(re_tup[1], name) is not None:
badness += int(re_tup[0])
if re_match_cmdline:
cmdline = pid_to_cmdline(pid)
for re_tup in cmdline_re_list:
if search(re_tup[1], cmdline) is not None:
badness += int(re_tup[0])
if re_match_uid:
uid = pid_to_uid(pid)
for re_tup in uid_re_list:
if search(re_tup[1], uid) is not None:
badness += int(re_tup[0])
if print_proc_table:
print('{} {} {} {}'.format(
pid.rjust(7),
str(badness).rjust(7),
pid_to_name(pid).ljust(15),
pid_to_uid(pid).rjust(10)
)
)
except FileNotFoundError:
continue
except ProcessLookupError:
continue continue
if print_proc_table:
print('{} {} {} {} {}'.format(
pid.rjust(7),
str(badness).rjust(7),
pid_to_name(pid).ljust(15),
pid_to_uid(pid).rjust(10),
pid_to_cmdline(pid))
)
pid_badness_list.append((pid, badness)) pid_badness_list.append((pid, badness))
# Make list of (pid, badness) tuples, sorted by 'badness' values # Make list of (pid, badness) tuples, sorted by 'badness' values
@ -665,220 +698,252 @@ def fattest():
# Get maximum 'badness' value # Get maximum 'badness' value
victim_badness = pid_tuple_list[1] victim_badness = pid_tuple_list[1]
victim_name = pid_to_name(pid)
print( print(
'\nWorst process (PID: {}, Name: {}, badness: {}) found in {} ms'.format( '\nWorst process (PID: {}, Name: {}, badness: {}) found in {} ms'.format(
pid, pid,
pid_to_name(pid), victim_name,
victim_badness, victim_badness,
round((time() - ft1) * 1000) round((time() - ft1) * 1000)
) )
) )
return pid, victim_badness return pid, victim_badness, victim_name
def find_victim_and_send_signal(signal): def find_victim_info(pid, victim_badness, name):
status0 = time()
try:
with open('/proc/' + pid + '/status') as f:
for n, line in enumerate(f):
if n is state_index:
state = line.split('\t')[1].rstrip()
continue
if n is ppid_index:
ppid = line.split('\t')[1]
continue
if n is uid_index:
uid = line.split('\t')[2]
continue
if n is vm_size_index:
vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if n is vm_rss_index:
vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if detailed_rss:
if n is anon_index:
anon_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is file_index:
file_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is shmem_index:
shmem_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is vm_swap_index:
vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
break
cmdline = pid_to_cmdline(pid)
oom_score = rline1('/proc/' + pid + '/oom_score')
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
except FileNotFoundError:
print('The victim died in the search process: FileNotFoundError')
update_stat_dict_and_print(
'The victim died in the search process: FileNotFoundError')
return None
except ProcessLookupError:
print('The victim died in the search process: ProcessLookupError')
update_stat_dict_and_print(
'The victim died in the search process: ProcessLookupError')
return None
except UnicodeDecodeError:
with open('/proc/' + pid + '/status', 'rb') as f:
f_list = f.read().decode('utf-8', 'ignore').split('\n')
for i in range(len(f_list)):
if i is state_index:
state = f_list[i].split('\t')[1].rstrip()
if i is ppid_index:
ppid = f_list[i].split('\t')[1]
if i is uid_index:
uid = f_list[i].split('\t')[2]
if i is vm_size_index:
vm_size = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
if i is vm_rss_index:
vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
if detailed_rss:
if i is anon_index:
anon_rss = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
if i is file_index:
file_rss = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
if i is shmem_index:
shmem_rss = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
if i is vm_swap_index:
vm_swap = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
cmdline = pid_to_cmdline(pid)
oom_score = rline1('/proc/' + pid + '/oom_score')
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
except IndexError:
print('The victim died in the search process: IndexError')
update_stat_dict_and_print(
'The victim died in the search process: IndexError')
return None
except ValueError:
print('The victim died in the search process: ValueError')
update_stat_dict_and_print(
'The victim died in the search process: ValueError')
return None
except FileNotFoundError:
print('The victim died in the search process: FileNotFoundError')
update_stat_dict_and_print(
'The victim died in the search process: FileNotFoundError')
return None
except ProcessLookupError:
print('The victim died in the search process: ProcessLookupError')
update_stat_dict_and_print(
'The victim died in the search process: ProcessLookupError')
return None
# print((time() - status0) * 1000, 'status time')
len_vm = len(str(vm_size))
try:
realpath = os.path.realpath('/proc/' + pid + '/exe')
except FileNotFoundError:
print('The victim died in the search process: FileNotFoundError')
update_stat_dict_and_print(
'The victim died in the search process: FileNotFoundError')
return None
te1 = time()
ancestry = pid_to_ancestry(pid, max_ancestry_depth)
# print((time() - te1) * 1000, 'ms, ancestry')
# if max_ancestry_depth == 0:
# ancestry = '\n PPID: {} ({})'.format(ppid, pname)
if detailed_rss:
detailed_rss_info = ' (' \
'Anon: {} MiB, ' \
'File: {} MiB, ' \
'Shmem: {} MiB)'.format(
anon_rss,
file_rss,
shmem_rss)
else:
detailed_rss_info = ''
victim_lifetime = format_time(uptime() - pid_to_starttime(pid))
victim_info = '\nFound a process with highest badness:' \
'\n Name: {}' \
'\n State: {}' \
'\n PID: {}' \
'{}' \
'\n EUID: {}' \
'\n badness: {}, ' \
'oom_score: {}, ' \
'oom_score_adj: {}' \
'\n VmSize: {} MiB' \
'\n VmRSS: {} MiB {}' \
'\n VmSwap: {} MiB' \
'\n Realpath: {}' \
'\n Cmdline: {}' \
'\n Lifetime: {}'.format(
name,
state,
pid,
ancestry,
uid,
victim_badness,
oom_score,
oom_score_adj,
vm_size,
str(vm_rss).rjust(len_vm),
detailed_rss_info,
str(vm_swap).rjust(len_vm),
realpath,
cmdline,
victim_lifetime)
return victim_info
def implement_corrective_action(signal):
""" """
Find victim with highest badness and send SIGTERM/SIGKILL Find victim with highest badness and send SIGTERM/SIGKILL
-> implement_corrective_action()
""" """
pid, victim_badness = fattest() pid, victim_badness, name = find_victim()
name = pid_to_name(pid)
if victim_badness >= min_badness: if victim_badness >= min_badness:
# Try to send signal to found victim print(find_victim_info(pid, victim_badness, name))
# Get VmRSS and VmSwap and cmdline of victim process # kill the victim if it doesn't respond to SIGTERM
# and try to send a signal if signal is SIGTERM:
try: victim_id = get_victim_id(pid)
if victim_id not in victim_dict:
with open('/proc/' + pid + '/status') as f: victim_dict.update({victim_id: time()})
for n, line in enumerate(f): else:
if time() - victim_dict[
if n is state_index: victim_id] > max_post_sigterm_victim_lifetime:
state = line.split('\t')[1].rstrip() print(
'\nmax_post_sigterm_victim_lifetime excee'
if n is ppid_index: 'ded: the victim will get SIGKILL'
ppid = line.split('\t')[1] )
signal = SIGKILL
if n is uid_index:
uid = line.split('\t')[2]
continue
if n is vm_size_index:
vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if n is vm_rss_index:
vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if detailed_rss:
if n is anon_index:
anon_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is file_index:
file_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is shmem_index:
shmem_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is vm_swap_index:
vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
break
cmdline = pid_to_cmdline(pid)
oom_score = rline1('/proc/' + pid + '/oom_score')
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
except FileNotFoundError:
print(mem_info)
print('The victim died in the search process: FileNotFoundError')
update_stat_dict_and_print(
'The victim died in the search process: FileNotFoundError')
return None
except ProcessLookupError:
print(mem_info)
print('The victim died in the search process: ProcessLookupError')
update_stat_dict_and_print(
'The victim died in the search process: ProcessLookupError')
return None
except UnicodeDecodeError:
# тут надо снова все исключ обработать
with open('/proc/' + pid + '/status', 'rb') as f:
f_list = f.read().decode('utf-8', 'ignore').split('\n')
for i in range(len(f_list)):
if i is ppid_index:
ppid = f_list[i].split('\t')[1]
for i in range(len(f_list)):
if i is uid_index:
uid = f_list[i].split('\t')[2]
if i is vm_size_index:
vm_size = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
if i is vm_rss_index:
vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
if detailed_rss:
if i is anon_index:
anon_rss = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
if i is file_index:
file_rss = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
if i is shmem_index:
shmem_rss = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
if i is vm_swap_index:
vm_swap = kib_to_mib(
int(f_list[i].split('\t')[1][:-3]))
with open('/proc/' + pid + '/cmdline') as file:
cmdline = file.readlines()[0].replace('\x00', ' ')
oom_score = rline1('/proc/' + pid + '/oom_score')
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
except IndexError:
print(mem_info)
print('The victim died in the search process: IndexError')
update_stat_dict_and_print(
'The victim died in the search process: IndexError')
return None
except ValueError:
print(mem_info)
print('The victim died in the search process: ValueError')
update_stat_dict_and_print(
'The victim died in the search process: ValueError')
return None
len_vm = len(str(vm_size))
try:
realpath = os.path.realpath('/proc/' + pid + '/exe')
except FileNotFoundError:
print(mem_info)
print('The victim died in the search process: FileNotFoundError')
update_stat_dict_and_print(
'The victim died in the search process: FileNotFoundError')
return None
#state = pid_to_state(pid)
pname = pid_to_name(ppid.strip('\n '))
# print([ppid], [pname])
'''
te1 = time()
ancestry = pid_to_ancestry(pid)
print((time() - te1) * 1000)
'''
if detailed_rss:
detailed_rss_info = ' (' \
'Anon: {} MiB, ' \
'File: {} MiB, ' \
'Shmem: {} MiB)'.format(
anon_rss,
file_rss,
shmem_rss)
else:
detailed_rss_info = ''
victim_info = 'Found a process with highest badness:' \
'\n Name: {}' \
'\n State: {}' \
'\n PID: {}' \
'\n PPID: {} ({})' \
'\n EUID: {}' \
'\n badness: {}, ' \
'oom_score: {}, ' \
'oom_score_adj: {}' \
'\n VmSize: {} MiB' \
'\n VmRSS: {} MiB {}' \
'\n VmSwap: {} MiB' \
'\n realpath: {}' \
'\n cmdline: {}'.format(
name,
state,
pid,
ppid.strip('\n '),
pname,
uid,
victim_badness,
oom_score,
oom_score_adj,
vm_size,
str(vm_rss).rjust(len_vm),
detailed_rss_info,
str(vm_swap).rjust(len_vm),
realpath,
cmdline)
if execute_the_command and signal is SIGTERM and name in etc_dict: if execute_the_command and signal is SIGTERM and name in etc_dict:
command = etc_dict[name] command = etc_dict[name]
# todo: make new func
m = check_mem_and_swap()
ma = round(int(m[0]) / 1024.0)
sf = round(int(m[2]) / 1024.0)
print('\nMemory status before implementing a corrective action:\n MemAvailable'
': {} MiB, SwapFree: {} MiB'.format(ma, sf))
exit_status = os.system(etc_dict[name].replace( exit_status = os.system(etc_dict[name].replace(
'$PID', pid).replace('$NAME', pid_to_name(pid))) '$PID', pid).replace('$NAME', pid_to_name(pid)))
@ -896,7 +961,6 @@ def find_victim_and_send_signal(signal):
'$PID', pid).replace('$NAME', pid_to_name(pid)), exit_status, '$PID', pid).replace('$NAME', pid_to_name(pid)), exit_status,
round(response_time * 1000)) round(response_time * 1000))
print(mem_info)
print(etc_info) print(etc_info)
key = "Run the command '{}'".format(command) key = "Run the command '{}'".format(command)
@ -915,7 +979,7 @@ def find_victim_and_send_signal(signal):
m = check_mem_and_swap() m = check_mem_and_swap()
ma = round(int(m[0]) / 1024.0) ma = round(int(m[0]) / 1024.0)
sf = round(int(m[2]) / 1024.0) sf = round(int(m[2]) / 1024.0)
print('\nMemory status before sending a signal:\n MemAvailable' print('\nMemory status before implementing a corrective action:\n MemAvailable'
': {} MiB, SwapFree: {} MiB'.format(ma, sf)) ': {} MiB, SwapFree: {} MiB'.format(ma, sf))
os.kill(int(pid), signal) os.kill(int(pid), signal)
@ -923,10 +987,9 @@ def find_victim_and_send_signal(signal):
send_result = 'OK; response time: {} ms'.format( send_result = 'OK; response time: {} ms'.format(
round(response_time * 1000)) round(response_time * 1000))
preventing_oom_message = '{}' \ preventing_oom_message = '\nImplement a corrective action:' \
'\nImplement a corrective action:\n ' \ '\n Send {} to the victim; {}'.format(
'Send {} to the victim; {}'.format( sig_dict[signal], send_result)
victim_info, sig_dict[signal], send_result)
key = 'Send {} to {}'.format( key = 'Send {} to {}'.format(
sig_dict[signal], name) sig_dict[signal], name)
@ -953,7 +1016,6 @@ def find_victim_and_send_signal(signal):
round(response_time * 1000)) round(response_time * 1000))
key = 'ProcessLookupError (the victim died in the search process): ' key = 'ProcessLookupError (the victim died in the search process): '
print(mem_info)
print(preventing_oom_message) print(preventing_oom_message)
update_stat_dict_and_print(key) update_stat_dict_and_print(key)
@ -961,7 +1023,6 @@ def find_victim_and_send_signal(signal):
else: else:
response_time = time() - time0 response_time = time() - time0
print(mem_info)
victim_badness_is_too_small = 'victim badness {} < min_b' \ victim_badness_is_too_small = 'victim badness {} < min_b' \
'adness {}; nothing to do; response time: {} ms'.format( 'adness {}; nothing to do; response time: {} ms'.format(
victim_badness, victim_badness,
@ -1095,10 +1156,31 @@ def calculate_percent(arg_key):
########################################################################## ##########################################################################
# Try to lock all memory
if len(argv) == 1:
if os.path.exists('./nohang.conf'):
config = cd = os.getcwd() + '/nohang.conf'
else:
config = '/etc/nohang/nohang.conf'
mlockall() elif len(argv) == 2:
if argv[1] == '--help' or argv[1] == '-h':
errprint(help_mess)
exit(1)
else:
errprint('Invalid CLI input')
exit(1)
elif len(argv) > 3:
errprint('Invalid CLI input')
exit(1)
else:
if argv[1] == '--config' or argv[1] == '-c':
config = argv[2]
else:
errprint('Invalid option: {}'.format(argv[1]))
exit(1)
########################################################################## ##########################################################################
@ -1621,9 +1703,19 @@ warn_time_now = 0
warn_time_delta = 1000 warn_time_delta = 1000
warn_timer = 0 warn_timer = 0
##########################################################################
# Try to lock all memory
mlockall()
##########################################################################
if print_proc_table: if print_proc_table:
print() print()
fattest() find_victim()
print() print()
print('Monitoring started!') print('Monitoring started!')
@ -1664,14 +1756,14 @@ while True:
if avg10 >= sigkill_psi and time() - psi_t0 >= psi_avg10_sleep_time: if avg10 >= sigkill_psi and time() - psi_t0 >= psi_avg10_sleep_time:
time0 = time() time0 = time()
mem_info = 'avg ({}) > sigkill_psi ({})'.format(avg10, sigkill_psi) mem_info = 'avg ({}) > sigkill_psi ({})'.format(avg10, sigkill_psi)
find_victim_and_send_signal(SIGKILL) implement_corrective_action(SIGKILL)
psi_t0 = time() psi_t0 = time()
continue continue
if avg10 >= sigterm_psi and time() - psi_t0 >= psi_avg10_sleep_time: if avg10 >= sigterm_psi and time() - psi_t0 >= psi_avg10_sleep_time:
time0 = time() time0 = time()
mem_info = 'avg ({}) > sigterm_psi ({})'.format(avg10, sigterm_psi) mem_info = 'avg ({}) > sigterm_psi ({})'.format(avg10, sigterm_psi)
find_victim_and_send_signal(SIGTERM) implement_corrective_action(SIGTERM)
psi_t0 = time() psi_t0 = time()
continue continue
@ -1780,12 +1872,11 @@ while True:
# далее пошла проверка превышения порогов # далее пошла проверка превышения порогов
# MEM SWAP KILL # MEM SWAP KILL
if mem_available <= mem_min_sigkill_kb and \ if (mem_available <= mem_min_sigkill_kb and
swap_free <= swap_min_sigkill_kb: swap_free <= swap_min_sigkill_kb):
time0 = time() time0 = time()
mem_info = '{}\nMemory status that r' \ mem_info = '{}\nMemory status that requires corrective actions:' \
'equires corrective actions:' \
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
'p_min_sigkill [{} MiB, {} %]'.format( 'p_min_sigkill [{} MiB, {} %]'.format(
@ -1799,7 +1890,7 @@ while True:
kib_to_mib(swap_min_sigkill_kb), kib_to_mib(swap_min_sigkill_kb),
swap_sigkill_pc) swap_sigkill_pc)
find_victim_and_send_signal(SIGKILL) implement_corrective_action(SIGKILL)
psi_t0 = time() psi_t0 = time()
continue continue
@ -1807,8 +1898,7 @@ while True:
if mem_used_zram >= zram_max_sigkill_kb: if mem_used_zram >= zram_max_sigkill_kb:
time0 = time() time0 = time()
mem_info = '{}\nMemory statu' \ mem_info = '{}\nMemory status that requires corrective actions:' \
's that requires corrective actions:' \
'\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \ '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
'kill [{} MiB, {} %]'.format( 'kill [{} MiB, {} %]'.format(
HR, HR,
@ -1817,7 +1907,7 @@ while True:
kib_to_mib(zram_max_sigkill_kb), kib_to_mib(zram_max_sigkill_kb),
percent(zram_max_sigkill_kb / mem_total)) percent(zram_max_sigkill_kb / mem_total))
find_victim_and_send_signal(SIGKILL) implement_corrective_action(SIGKILL)
psi_t0 = time() psi_t0 = time()
continue continue
@ -1827,8 +1917,7 @@ while True:
time0 = time() time0 = time()
mem_info = '{}\nMemory status tha' \ mem_info = '{}\nMemory status that requires corrective actions:' \
't requires corrective actions:' \
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
'p_min_sigterm [{} MiB, {} %]'.format( 'p_min_sigterm [{} MiB, {} %]'.format(
@ -1844,7 +1933,9 @@ while True:
kib_to_mib(swap_min_sigterm_kb), kib_to_mib(swap_min_sigterm_kb),
swap_sigterm_pc) swap_sigterm_pc)
find_victim_and_send_signal(SIGTERM) print(mem_info)
implement_corrective_action(SIGTERM)
psi_t0 = time() psi_t0 = time()
continue continue
@ -1852,8 +1943,7 @@ while True:
if mem_used_zram >= zram_max_sigterm_kb: if mem_used_zram >= zram_max_sigterm_kb:
time0 = time() time0 = time()
mem_info = '{}\nMemory status that r' \ mem_info = '{}\nMemory status that requires corrective actions:' \
'equires corrective actions:' \
'\n MemUsedZram [{} MiB, {} %] >= ' \ '\n MemUsedZram [{} MiB, {} %] >= ' \
'zram_max_sigterm [{} M, {} %]'.format( 'zram_max_sigterm [{} M, {} %]'.format(
HR, HR,
@ -1862,9 +1952,8 @@ while True:
kib_to_mib(zram_max_sigterm_kb), kib_to_mib(zram_max_sigterm_kb),
percent(zram_max_sigterm_kb / mem_total)) percent(zram_max_sigterm_kb / mem_total))
find_victim_and_send_signal(SIGTERM) implement_corrective_action(SIGTERM)
# сделать одно время для обоих уровней.
psi_t0 = time() psi_t0 = time()
continue continue