fix psi
This commit is contained in:
parent
16e749b03f
commit
3427fe459a
30
misc/random-trigger
Executable file
30
misc/random-trigger
Executable file
@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import random
|
||||
|
||||
# число элементов в списке, влияет на потребляемую память
|
||||
n = 400 * 1000 * 1000
|
||||
|
||||
# число итераций замены элементов списка
|
||||
c = 10
|
||||
|
||||
print('Наполняем список (n = {}) случайными числами...'.format(n))
|
||||
|
||||
try:
|
||||
# добавляем в пустой список n случайных чисел
|
||||
x = []
|
||||
for _ in range(n):
|
||||
x.append(random.random())
|
||||
|
||||
for i in range(c):
|
||||
print('Читение и запись новых значений, итерация {} из {}'.format(i + 1, c))
|
||||
|
||||
# заменяем элементы списка на новые
|
||||
for i in range(n):
|
||||
x[i] = x[i] * 0.999
|
||||
|
||||
del x
|
||||
|
||||
except KeyboardInterrupt:
|
||||
del x
|
||||
|
116
nohang
116
nohang
@ -41,7 +41,6 @@ if self_uid == 0:
|
||||
else:
|
||||
root = False
|
||||
|
||||
# wait_time = 10
|
||||
|
||||
notify_helper_path = '/usr/sbin/nohang_notify_helper'
|
||||
|
||||
@ -1528,6 +1527,7 @@ except FileNotFoundError:
|
||||
# extracting parameters from the dictionary
|
||||
# check for all necessary parameters
|
||||
# validation of all parameters
|
||||
psi_debug = conf_parse_bool('psi_debug')
|
||||
|
||||
|
||||
forbid_negative_badness = conf_parse_bool('forbid_negative_badness')
|
||||
@ -1649,45 +1649,45 @@ else:
|
||||
exit(1)
|
||||
|
||||
|
||||
if 'psi_avg10_sleep_time' in config_dict:
|
||||
psi_avg10_sleep_time = string_to_float_convert_test(
|
||||
config_dict['psi_avg10_sleep_time'])
|
||||
if psi_avg10_sleep_time is None:
|
||||
errprint('Invalid psi_avg10_sleep_time value, not float\nExit')
|
||||
if 'psi_post_action_delay' in config_dict:
|
||||
psi_post_action_delay = string_to_float_convert_test(
|
||||
config_dict['psi_post_action_delay'])
|
||||
if psi_post_action_delay is None:
|
||||
errprint('Invalid psi_post_action_delay value, not float\nExit')
|
||||
exit(1)
|
||||
if psi_avg10_sleep_time < 0:
|
||||
errprint('psi_avg10_sleep_time must be positive\nExit')
|
||||
if psi_post_action_delay < 0:
|
||||
errprint('psi_post_action_delay must be positive\nExit')
|
||||
exit(1)
|
||||
else:
|
||||
errprint('psi_avg10_sleep_time not in config\nExit')
|
||||
errprint('psi_post_action_delay not in config\nExit')
|
||||
exit(1)
|
||||
|
||||
|
||||
if 'sigkill_psi_avg10' in config_dict:
|
||||
sigkill_psi_avg10 = string_to_float_convert_test(
|
||||
config_dict['sigkill_psi_avg10'])
|
||||
if sigkill_psi_avg10 is None:
|
||||
errprint('Invalid sigkill_psi_avg10 value, not float\nExit')
|
||||
if 'sigkill_psi_threshold' in config_dict:
|
||||
sigkill_psi_threshold = string_to_float_convert_test(
|
||||
config_dict['sigkill_psi_threshold'])
|
||||
if sigkill_psi_threshold is None:
|
||||
errprint('Invalid sigkill_psi_threshold value, not float\nExit')
|
||||
exit(1)
|
||||
if sigkill_psi_avg10 < 0 or sigkill_psi_avg10 > 100:
|
||||
errprint('sigkill_psi_avg10 must be in the range [0; 100]\nExit')
|
||||
if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100:
|
||||
errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit')
|
||||
exit(1)
|
||||
else:
|
||||
errprint('sigkill_psi_avg10 not in config\nExit')
|
||||
errprint('sigkill_psi_threshold not in config\nExit')
|
||||
exit(1)
|
||||
|
||||
|
||||
if 'sigterm_psi_avg10' in config_dict:
|
||||
sigterm_psi_avg10 = string_to_float_convert_test(
|
||||
config_dict['sigterm_psi_avg10'])
|
||||
if sigterm_psi_avg10 is None:
|
||||
errprint('Invalid sigterm_psi_avg10 value, not float\nExit')
|
||||
if 'sigterm_psi_threshold' in config_dict:
|
||||
sigterm_psi_threshold = string_to_float_convert_test(
|
||||
config_dict['sigterm_psi_threshold'])
|
||||
if sigterm_psi_threshold is None:
|
||||
errprint('Invalid sigterm_psi_threshold value, not float\nExit')
|
||||
exit(1)
|
||||
if sigterm_psi_avg10 < 0 or sigterm_psi_avg10 > 100:
|
||||
errprint('sigterm_psi_avg10 must be in the range [0; 100]\nExit')
|
||||
if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100:
|
||||
errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit')
|
||||
exit(1)
|
||||
else:
|
||||
errprint('sigterm_psi_avg10 not in config\nExit')
|
||||
errprint('sigterm_psi_threshold not in config\nExit')
|
||||
exit(1)
|
||||
|
||||
|
||||
@ -2077,17 +2077,12 @@ log('Monitoring started!')
|
||||
|
||||
stdout.flush()
|
||||
|
||||
# тупое присвоение. нид фикс
|
||||
sigterm_psi = sigterm_psi_avg10
|
||||
sigkill_psi = sigkill_psi_avg10
|
||||
|
||||
|
||||
##########################################################################
|
||||
|
||||
if psi_support and not ignore_psi:
|
||||
psi_t0 = time() + psi_avg10_sleep_time
|
||||
|
||||
avg_value = ''
|
||||
psi_t0 = time()
|
||||
psi_avg_string = '' # will be overwritten if PSI monitoring enabled
|
||||
|
||||
|
||||
if print_mem_check_results:
|
||||
@ -2104,26 +2099,53 @@ while True:
|
||||
|
||||
if psi_support and not ignore_psi:
|
||||
|
||||
# avg10 = psi_mem_some_avg10()
|
||||
# psi_avg_value!
|
||||
avg10 = find_psi_metrics_value(psi_path, psi_metrics)
|
||||
psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
|
||||
|
||||
if print_mem_check_results:
|
||||
avg_value = 'PSI avg value: {} | '.format(str(avg10).rjust(6))
|
||||
psi_avg_string = 'PSI avg value: {} | '.format(
|
||||
str(psi_avg_value).rjust(6))
|
||||
|
||||
if avg10 >= sigkill_psi and time() - psi_t0 >= psi_avg10_sleep_time:
|
||||
if psi_avg_value >= sigkill_psi_threshold:
|
||||
sigkill_psi_exceeded = True
|
||||
else:
|
||||
sigkill_psi_exceeded = False
|
||||
|
||||
if psi_avg_value >= sigterm_psi_threshold:
|
||||
sigterm_psi_exceeded = True
|
||||
else:
|
||||
sigterm_psi_exceeded = False
|
||||
|
||||
if time() - psi_t0 >= psi_post_action_delay:
|
||||
psi_post_action_delay_exceeded = True
|
||||
else:
|
||||
psi_post_action_delay_exceeded = False
|
||||
|
||||
if psi_debug:
|
||||
log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps'
|
||||
'i_post_action_delay_exceeded: {}'.format(
|
||||
sigterm_psi_exceeded,
|
||||
sigkill_psi_exceeded,
|
||||
psi_post_action_delay_exceeded))
|
||||
|
||||
if sigkill_psi_exceeded and psi_post_action_delay_exceeded:
|
||||
time0 = time()
|
||||
mem_info = 'PSI avg value ({}) > sigkill_psi ({})'.format(
|
||||
avg10, sigkill_psi)
|
||||
implement_corrective_action(SIGKILL)
|
||||
mem_info = 'PSI avg value ({}) > sigkill_psi_threshold ({})'.format(
|
||||
psi_avg_value, sigkill_psi_threshold)
|
||||
|
||||
# implement_corrective_action(SIGKILL)
|
||||
print('implement!')
|
||||
|
||||
psi_t0 = time()
|
||||
continue
|
||||
|
||||
if avg10 >= sigterm_psi and time() - psi_t0 >= psi_avg10_sleep_time:
|
||||
if sigterm_psi_exceeded and psi_post_action_delay_exceeded:
|
||||
time0 = time()
|
||||
mem_info = 'PSI avg value ({}) > sigterm_psi ({})'.format(
|
||||
avg10, sigterm_psi)
|
||||
implement_corrective_action(SIGTERM)
|
||||
mem_info = 'PSI avg value ({}) > sigterm_psi_threshold ({})'.format(
|
||||
psi_avg_value, sigterm_psi_threshold)
|
||||
|
||||
# implement_corrective_action(SIGTERM)
|
||||
print('implement!')
|
||||
|
||||
psi_t0 = time()
|
||||
continue
|
||||
|
||||
@ -2183,7 +2205,7 @@ while True:
|
||||
# Output available mem sizes
|
||||
if swap_total == 0 and mem_used_zram == 0:
|
||||
log('{}MemAvail: {} M, {} %{}'.format(
|
||||
avg_value,
|
||||
psi_avg_string,
|
||||
human(mem_available, mem_len),
|
||||
just_percent_mem(mem_available / mem_total),
|
||||
speed_info
|
||||
@ -2192,7 +2214,7 @@ while True:
|
||||
|
||||
elif swap_total > 0 and mem_used_zram == 0:
|
||||
log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format(
|
||||
avg_value,
|
||||
psi_avg_string,
|
||||
human(mem_available, mem_len),
|
||||
just_percent_mem(mem_available / mem_total),
|
||||
human(swap_free, swap_len),
|
||||
@ -2204,7 +2226,7 @@ while True:
|
||||
else:
|
||||
log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
|
||||
'UsedZram: {} M, {} %{}'.format(
|
||||
avg_value,
|
||||
psi_avg_string,
|
||||
human(mem_available, mem_len),
|
||||
just_percent_mem(mem_available / mem_total),
|
||||
human(swap_free, swap_len),
|
||||
|
@ -96,10 +96,10 @@ psi_path = /proc/pressure/memory
|
||||
|
||||
psi_metrics = some_avg10
|
||||
|
||||
sigterm_psi_avg10 = 60
|
||||
sigkill_psi_avg10 = 90
|
||||
sigterm_psi_threshold = 80
|
||||
sigkill_psi_threshold = 90
|
||||
|
||||
psi_avg10_sleep_time = 60
|
||||
psi_post_action_delay = 40
|
||||
|
||||
|
||||
#####################################################################
|
||||
@ -384,6 +384,9 @@ max_ancestry_depth = 1
|
||||
|
||||
separate_log = False
|
||||
|
||||
|
||||
psi_debug = False
|
||||
|
||||
#####################################################################
|
||||
|
||||
9. Misc
|
||||
|
Loading…
Reference in New Issue
Block a user