This commit is contained in:
Alexey Avramov 2019-09-15 20:24:43 +09:00
parent 0084bee76d
commit 2f4ff48200

185
nohang
View File

@ -29,12 +29,16 @@ def check_config():
log('1. Thresholds below which a signal should be sent to the victim')
log(' soft_threshold_min_mem: {} MiB, {} %'.format(round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1)))
log(' hard_threshold_min_mem: {} MiB, {} %'.format(round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1)))
log(' soft_threshold_min_mem: {} MiB, {} %'.format(
round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1)))
log(' hard_threshold_min_mem: {} MiB, {} %'.format(
round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1)))
log(' soft_threshold_min_swap: {}'.format(soft_threshold_min_swap))
log(' hard_threshold_min_swap: {}'.format(hard_threshold_min_swap))
log(' soft_threshold_max_zram: {} MiB, {} %'.format(round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1)))
log(' hard_threshold_max_zram: {} MiB, {} %'.format(round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1)))
log(' soft_threshold_max_zram: {} MiB, {} %'.format(
round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1)))
log(' hard_threshold_max_zram: {} MiB, {} %'.format(
round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1)))
log('2. Response on PSI memory metrics')
@ -61,7 +65,8 @@ def check_config():
log(' post_soft_action_delay: {} sec'.format(post_soft_action_delay))
log(' post_zombie_delay: {} sec'.format(post_zombie_delay))
log(' victim_cache_time: {} sec'.format(victim_cache_time))
log(' ignore_positive_oom_score_adj: {}'.format(ignore_positive_oom_score_adj))
log(' ignore_positive_oom_score_adj: {}'.format(
ignore_positive_oom_score_adj))
log('5. Impact on the badness of processes')
@ -132,12 +137,16 @@ def check_config():
log('7. GUI notifications')
log(' post_action_gui_notifications: {}'.format(post_action_gui_notifications))
log(' low_memory_warnings_enabled: {}'.format(low_memory_warnings_enabled))
log(' post_action_gui_notifications: {}'.format(
post_action_gui_notifications))
log(' low_memory_warnings_enabled: {}'.format(
low_memory_warnings_enabled))
log(' warning_exe: {}'.format(warning_exe))
log(' warning_threshold_min_mem: {} MiB, {} %'.format(round(warning_threshold_min_mem_mb), round(warning_threshold_min_mem_percent, 1)))
log(' warning_threshold_min_mem: {} MiB, {} %'.format(round(
warning_threshold_min_mem_mb), round(warning_threshold_min_mem_percent, 1)))
log(' warning_threshold_min_swap: {}'.format(warning_threshold_min_swap))
log(' warning_threshold_max_zram: {} MiB, {} %'.format(round(warning_threshold_max_zram_mb), round(warning_threshold_max_zram_percent, 1)))
log(' warning_threshold_max_zram: {} MiB, {} %'.format(round(
warning_threshold_max_zram_mb), round(warning_threshold_max_zram_percent, 1)))
log(' warning_threshold_max_psi: {}'.format(warning_threshold_max_psi))
log(' min_post_warning_delay: {} sec'.format(min_post_warning_delay))
@ -172,12 +181,6 @@ def check_config():
exit()
def encoder(string):
"""
"""
@ -303,8 +306,7 @@ def exe(cmd):
return err
def go (func, *a):
def go(func, *a):
""" run func in new thread
"""
t1 = time()
@ -312,10 +314,12 @@ def go (func, *a):
Thread(target=func, args=a).start()
except RuntimeError:
print('RuntimeError: cannot spawn a new thread')
return 1
t2 = time()
log('New thread spawned in {} ms'.format(
round((t2 - t1) * 1000, 1)
))
))
return 0
def write(path, string):
@ -755,7 +759,6 @@ def mlockall():
# log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
def update_stat_dict_and_print(key):
"""
"""
@ -853,8 +856,6 @@ def check_zram():
return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
'''
def format_time(t):
t = int(t)
@ -872,8 +873,6 @@ def format_time(t):
'''
def format_time(t):
t = int(t)
@ -892,13 +891,6 @@ def format_time(t):
return '{} min {} sec'.format(m, s)
def string_to_float_convert_test(string):
"""Try to interprete string values as floats."""
try:
@ -1278,15 +1270,12 @@ def find_victim_info(pid, victim_badness, name):
state = line.split('\t')[1].rstrip()
continue
"""
if n is ppid_index:
# ppid = line.split('\t')[1]
continue
"""
if n is uid_index:
uid = line.split('\t')[2]
continue
@ -1345,14 +1334,12 @@ def find_victim_info(pid, victim_badness, name):
if i is state_index:
state = f_list[i].split('\t')[1].rstrip()
"""
if i is ppid_index:
pass
# ppid = f_list[i].split('\t')[1]
"""
if i is uid_index:
uid = f_list[i].split('\t')[2]
@ -1489,27 +1476,32 @@ def check_mem_swap_ex():
# if hard_threshold_min_swap is set in percent
if swap_kill_is_percent:
hard_threshold_min_swap_kb = swap_total * hard_threshold_min_swap_percent / 100.0
hard_threshold_min_swap_kb = swap_total * \
hard_threshold_min_swap_percent / 100.0
else:
hard_threshold_min_swap_kb = swap_kb_dict['hard_threshold_min_swap_kb']
if swap_term_is_percent:
soft_threshold_min_swap_kb = swap_total * soft_threshold_min_swap_percent / 100.0
soft_threshold_min_swap_kb = swap_total * \
soft_threshold_min_swap_percent / 100.0
else:
soft_threshold_min_swap_kb = swap_kb_dict['soft_threshold_min_swap_kb']
if swap_warn_is_percent:
warning_threshold_min_swap_kb = swap_total * warning_threshold_min_swap_percent / 100.0
warning_threshold_min_swap_kb = swap_total * \
warning_threshold_min_swap_percent / 100.0
else:
warning_threshold_min_swap_kb = swap_kb_dict['warning_threshold_min_swap_kb']
if swap_total > hard_threshold_min_swap_kb:
swap_sigkill_pc = percent(hard_threshold_min_swap_kb / (swap_total + 0.1))
swap_sigkill_pc = percent(
hard_threshold_min_swap_kb / (swap_total + 0.1))
else:
swap_sigkill_pc = '-'
if swap_total > soft_threshold_min_swap_kb:
swap_sigterm_pc = percent(soft_threshold_min_swap_kb / (swap_total + 0.1))
swap_sigterm_pc = percent(
soft_threshold_min_swap_kb / (swap_total + 0.1))
else:
swap_sigterm_pc = '-'
@ -1601,16 +1593,6 @@ def check_zram_ex():
return None, None, mem_used_zram
def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0):
"""
"""
@ -1704,15 +1686,6 @@ def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0):
psi_term_exceeded_timer, x0)
def is_victim_alive(victim_id):
"""
We do not have a reliable sign of the end of the release of memory:
@ -1959,8 +1932,6 @@ def implement_corrective_action(
success = False
"""
response_time = time() - time0
exit_status = None
@ -2021,13 +1992,8 @@ def implement_corrective_action(
else:
pass
last_action_dict['t'] = kill_timestamp = time()
# print(v_dict)
# response_time = time() - time0
@ -2036,7 +2002,6 @@ def implement_corrective_action(
# log('victim will die: ' + str(vwd))
# log('response_time: ' + str(response_time) + ' sec')
# НАЧАЛО ОТСЛЕЖИВАНИЯ СОСТОЯНИЯ ЖЕРТВЫ. Можно вынести в отд фц. Приним
# айди, логирует, возвращает что-то.
@ -2323,20 +2288,11 @@ else:
notify_helper_path = 'nohang_notify_helper'
last_action_dict = dict()
last_action_dict['t'] = time()
# will store corrective actions stat
stat_dict = dict()
@ -2362,15 +2318,12 @@ print_proc_table_flag = False
check_config_flag = False
if os.path.exists('./nohang.conf'):
config = os.getcwd() + '/nohang.conf'
else:
config = '/etc/nohang/nohang.conf'
if len(argv) == 1:
pass
elif len(argv) == 2:
@ -2678,7 +2631,8 @@ print_config_at_startup = conf_parse_bool('print_config_at_startup')
print_mem_check_results = conf_parse_bool('print_mem_check_results')
debug_sleep = conf_parse_bool('debug_sleep')
low_memory_warnings_enabled = conf_parse_bool('low_memory_warnings_enabled')
post_action_gui_notifications = conf_parse_bool('post_action_gui_notifications')
post_action_gui_notifications = conf_parse_bool(
'post_action_gui_notifications')
psi_checking_enabled = conf_parse_bool('psi_checking_enabled')
@ -2688,34 +2642,28 @@ zram_checking_enabled = conf_parse_bool('zram_checking_enabled')
ignore_zram = not zram_checking_enabled
debug_gui_notifications = conf_parse_bool('debug_gui_notifications')
ignore_positive_oom_score_adj = conf_parse_bool('ignore_positive_oom_score_adj')
ignore_positive_oom_score_adj = conf_parse_bool(
'ignore_positive_oom_score_adj')
(soft_threshold_min_mem_kb, soft_threshold_min_mem_mb,
soft_threshold_min_mem_percent) = calculate_percent('soft_threshold_min_mem')
(hard_threshold_min_mem_kb, hard_threshold_min_mem_mb,
hard_threshold_min_mem_percent) = calculate_percent('hard_threshold_min_mem')
(soft_threshold_max_zram_kb, soft_threshold_max_zram_mb,
soft_threshold_max_zram_percent) = calculate_percent('soft_threshold_max_zram')
(soft_threshold_min_mem_kb, soft_threshold_min_mem_mb, soft_threshold_min_mem_percent
) = calculate_percent('soft_threshold_min_mem')
(hard_threshold_max_zram_kb, hard_threshold_max_zram_mb,
hard_threshold_max_zram_percent) = calculate_percent('hard_threshold_max_zram')
(hard_threshold_min_mem_kb, hard_threshold_min_mem_mb, hard_threshold_min_mem_percent
) = calculate_percent('hard_threshold_min_mem')
(warning_threshold_min_mem_kb, warning_threshold_min_mem_mb,
warning_threshold_min_mem_percent) = calculate_percent('warning_threshold_min_mem')
(soft_threshold_max_zram_kb, soft_threshold_max_zram_mb, soft_threshold_max_zram_percent
) = calculate_percent('soft_threshold_max_zram')
(hard_threshold_max_zram_kb, hard_threshold_max_zram_mb, hard_threshold_max_zram_percent
) = calculate_percent('hard_threshold_max_zram')
(warning_threshold_min_mem_kb, warning_threshold_min_mem_mb, warning_threshold_min_mem_percent
) = calculate_percent('warning_threshold_min_mem')
(warning_threshold_max_zram_kb, warning_threshold_max_zram_mb, warning_threshold_max_zram_percent
) = calculate_percent('warning_threshold_max_zram')
(warning_threshold_max_zram_kb, warning_threshold_max_zram_mb,
warning_threshold_max_zram_percent) = calculate_percent('warning_threshold_max_zram')
if 'post_zombie_delay' in config_dict:
@ -2760,7 +2708,8 @@ else:
if 'fill_rate_swap' in config_dict:
fill_rate_swap = string_to_float_convert_test(config_dict['fill_rate_swap'])
fill_rate_swap = string_to_float_convert_test(
config_dict['fill_rate_swap'])
if fill_rate_swap is None:
errprint('Invalid fill_rate_swap value, not float\nExit')
exit(1)
@ -2773,7 +2722,8 @@ else:
if 'fill_rate_zram' in config_dict:
fill_rate_zram = string_to_float_convert_test(config_dict['fill_rate_zram'])
fill_rate_zram = string_to_float_convert_test(
config_dict['fill_rate_zram'])
if fill_rate_zram is None:
errprint('Invalid fill_rate_zram value, not float\nExit')
exit(1)
@ -2862,7 +2812,8 @@ if 'warning_threshold_max_psi' in config_dict:
errprint('Invalid warning_threshold_max_psi value, not float\nExit')
exit(1)
if warning_threshold_max_psi < 0 or warning_threshold_max_psi > 100:
errprint('warning_threshold_max_psi must be in the range [0; 100]\nExit')
errprint(
'warning_threshold_max_psi must be in the range [0; 100]\nExit')
exit(1)
else:
errprint('warning_threshold_max_psi not in config\nExit')
@ -2883,9 +2834,6 @@ else:
exit(1)
if 'min_post_warning_delay' in config_dict:
min_post_warning_delay = string_to_float_convert_test(
config_dict['min_post_warning_delay'])
@ -3129,9 +3077,12 @@ psi_support = os.path.exists(psi_path)
# Get KiB levels if it's possible.
soft_threshold_min_swap_tuple = get_swap_threshold_tuple(soft_threshold_min_swap)
hard_threshold_min_swap_tuple = get_swap_threshold_tuple(hard_threshold_min_swap)
warning_threshold_min_swap_tuple = get_swap_threshold_tuple(warning_threshold_min_swap)
soft_threshold_min_swap_tuple = get_swap_threshold_tuple(
soft_threshold_min_swap)
hard_threshold_min_swap_tuple = get_swap_threshold_tuple(
hard_threshold_min_swap)
warning_threshold_min_swap_tuple = get_swap_threshold_tuple(
warning_threshold_min_swap)
swap_kb_dict = dict()
@ -3162,24 +3113,10 @@ else:
##########################################################################
if print_config_at_startup or check_config_flag:
check_config()
##########################################################################
@ -3264,8 +3201,6 @@ log('Monitoring has started!')
stdout.flush()
##########################################################################