fix broken psi and zram
This commit is contained in:
parent
fdf2a1bebf
commit
30132b3e03
671
nohang
671
nohang
@ -815,11 +815,11 @@ def send_notify_warn():
|
|||||||
send_notification(title, body)
|
send_notification(title, body)
|
||||||
|
|
||||||
|
|
||||||
def send_notify(signal, name, pid):
|
def send_notify(threshold, name, pid):
|
||||||
"""
|
"""
|
||||||
Notificate about OOM Preventing.
|
Notificate about OOM Preventing.
|
||||||
|
|
||||||
signal: key for notify_sig_dict
|
threshold: key for notify_sig_dict
|
||||||
name: str process name
|
name: str process name
|
||||||
pid: str process pid
|
pid: str process pid
|
||||||
"""
|
"""
|
||||||
@ -831,7 +831,7 @@ def send_notify(signal, name, pid):
|
|||||||
|
|
||||||
title = 'Freeze prevention'
|
title = 'Freeze prevention'
|
||||||
body = '<b>{}</b> [{}] <b>{}</b>'.format(
|
body = '<b>{}</b> [{}] <b>{}</b>'.format(
|
||||||
notify_sig_dict[signal],
|
notify_sig_dict[threshold],
|
||||||
pid,
|
pid,
|
||||||
name.replace(
|
name.replace(
|
||||||
# symbol '&' can break notifications in some themes,
|
# symbol '&' can break notifications in some themes,
|
||||||
@ -1041,6 +1041,7 @@ def find_victim(_print_proc_table):
|
|||||||
)[0]
|
)[0]
|
||||||
|
|
||||||
pid = pid_tuple_list[0]
|
pid = pid_tuple_list[0]
|
||||||
|
victim_id = get_victim_id(pid)
|
||||||
|
|
||||||
# Get maximum 'badness' value
|
# Get maximum 'badness' value
|
||||||
victim_badness = pid_tuple_list[1]
|
victim_badness = pid_tuple_list[1]
|
||||||
@ -1062,7 +1063,7 @@ def find_victim(_print_proc_table):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return pid, victim_badness, victim_name
|
return pid, victim_badness, victim_name, victim_id
|
||||||
|
|
||||||
|
|
||||||
def find_victim_info(pid, victim_badness, name):
|
def find_victim_info(pid, victim_badness, name):
|
||||||
@ -1271,18 +1272,6 @@ def find_victim_info(pid, victim_badness, name):
|
|||||||
return victim_info
|
return victim_info
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def check_mem_swap_ex():
|
def check_mem_swap_ex():
|
||||||
"""
|
"""
|
||||||
Check: is mem and swap threshold exceeded?
|
Check: is mem and swap threshold exceeded?
|
||||||
@ -1314,8 +1303,8 @@ def check_mem_swap_ex():
|
|||||||
if (mem_available <= mem_min_sigkill_kb and
|
if (mem_available <= mem_min_sigkill_kb and
|
||||||
swap_free <= swap_min_sigkill_kb):
|
swap_free <= swap_min_sigkill_kb):
|
||||||
|
|
||||||
mem_info = 'Hard threshold exceeded\nMemory status that requ' \
|
mem_info = 'Memory status that requ' \
|
||||||
'ires corrective actions:' \
|
'ires corrective actions (hard threshold exceeded):' \
|
||||||
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
||||||
'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
||||||
'p_min_sigkill [{} MiB, {} %]'.format(
|
'p_min_sigkill [{} MiB, {} %]'.format(
|
||||||
@ -1328,13 +1317,13 @@ def check_mem_swap_ex():
|
|||||||
kib_to_mib(swap_min_sigkill_kb),
|
kib_to_mib(swap_min_sigkill_kb),
|
||||||
swap_sigkill_pc)
|
swap_sigkill_pc)
|
||||||
|
|
||||||
return SIGKILL, mem_info
|
return SIGKILL, mem_info, mem_available, swap_min_sigkill_kb, swap_min_sigterm_kb, swap_free, swap_total
|
||||||
|
|
||||||
if (mem_available <= mem_min_sigterm_kb and
|
if (mem_available <= mem_min_sigterm_kb and
|
||||||
swap_free <= swap_min_sigterm_kb):
|
swap_free <= swap_min_sigterm_kb):
|
||||||
|
|
||||||
mem_info = 'Soft threshold exceeded\nMemory status that requi' \
|
mem_info = 'Memory status that requi' \
|
||||||
'res corrective actions:' \
|
'res corrective actions (soft threshold exceeded):' \
|
||||||
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
||||||
'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
||||||
'p_min_sigterm [{} MiB, {} %]'.format(
|
'p_min_sigterm [{} MiB, {} %]'.format(
|
||||||
@ -1347,24 +1336,146 @@ def check_mem_swap_ex():
|
|||||||
kib_to_mib(swap_min_sigterm_kb),
|
kib_to_mib(swap_min_sigterm_kb),
|
||||||
swap_sigterm_pc)
|
swap_sigterm_pc)
|
||||||
|
|
||||||
return SIGTERM, mem_info
|
return SIGTERM, mem_info, mem_available, swap_min_sigkill_kb, swap_min_sigterm_kb, swap_free, swap_total
|
||||||
|
|
||||||
return None, None
|
if gui_low_memory_warnings:
|
||||||
|
|
||||||
|
if (mem_available <= mem_min_warnings_kb and swap_free <=
|
||||||
|
swap_min_warnings_kb + 0.1):
|
||||||
|
return 'WARN', None, mem_available, swap_min_sigkill_kb, swap_min_sigterm_kb, swap_free, swap_total
|
||||||
|
|
||||||
|
return None, None, mem_available, swap_min_sigkill_kb, swap_min_sigterm_kb, swap_free, swap_total
|
||||||
|
|
||||||
|
|
||||||
|
def check_zram_ex():
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
mem_used_zram = check_zram()
|
||||||
|
|
||||||
|
if mem_used_zram >= zram_max_sigkill_kb:
|
||||||
|
|
||||||
|
mem_info = 'Memory status that requir' \
|
||||||
|
'es corrective actions (hard threshold exceeded):' \
|
||||||
|
'\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
|
||||||
|
'kill [{} MiB, {} %]'.format(
|
||||||
|
kib_to_mib(mem_used_zram),
|
||||||
|
percent(mem_used_zram / mem_total),
|
||||||
|
kib_to_mib(zram_max_sigkill_kb),
|
||||||
|
percent(zram_max_sigkill_kb / mem_total))
|
||||||
|
|
||||||
|
return SIGKILL, mem_info, mem_used_zram
|
||||||
|
|
||||||
|
if mem_used_zram >= zram_max_sigterm_kb:
|
||||||
|
|
||||||
|
mem_info = 'Memory status that require' \
|
||||||
|
's corrective actions (soft threshold exceeded):\n MemUsedZram [{} MiB, {} %] >= zra' \
|
||||||
|
'm_max_sigterm [{} M, {} %]'.format(
|
||||||
|
kib_to_mib(mem_used_zram),
|
||||||
|
percent(mem_used_zram / mem_total),
|
||||||
|
kib_to_mib(zram_max_sigterm_kb),
|
||||||
|
percent(zram_max_sigterm_kb / mem_total))
|
||||||
|
|
||||||
|
return SIGTERM, mem_info, mem_used_zram
|
||||||
|
|
||||||
|
if gui_low_memory_warnings:
|
||||||
|
if mem_used_zram >= zram_max_warnings_kb:
|
||||||
|
return 'WARN', None, mem_used_zram
|
||||||
|
|
||||||
|
return None, None, mem_used_zram
|
||||||
|
|
||||||
|
|
||||||
|
def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
|
||||||
|
delta0 = time() - x0
|
||||||
|
x0 = time()
|
||||||
|
|
||||||
|
psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
|
||||||
|
# print(psi_avg_value)
|
||||||
|
|
||||||
|
psi_post_action_delay_timer = time() - psi_t0
|
||||||
|
|
||||||
|
if psi_post_action_delay_timer >= psi_post_action_delay:
|
||||||
|
psi_post_action_delay_exceeded = True
|
||||||
|
else:
|
||||||
|
psi_post_action_delay_exceeded = False
|
||||||
|
|
||||||
|
if psi_avg_value >= sigkill_psi_threshold:
|
||||||
|
sigkill_psi_exceeded = True
|
||||||
|
psi_kill_exceeded_timer += delta0
|
||||||
|
else:
|
||||||
|
sigkill_psi_exceeded = False
|
||||||
|
psi_kill_exceeded_timer = 0
|
||||||
|
|
||||||
|
if psi_debug:
|
||||||
|
|
||||||
|
log('psi_post_action_delay_timer: {}'.format(
|
||||||
|
round(psi_post_action_delay_timer, 3)))
|
||||||
|
|
||||||
|
log('psi_post_action_delay_exceeded: {}\nsigkill_psi_exceeded'
|
||||||
|
': {}\npsi_kill_exceeded_timer: {}'.format(
|
||||||
|
psi_post_action_delay_exceeded,
|
||||||
|
sigkill_psi_exceeded,
|
||||||
|
round(psi_kill_exceeded_timer, 1)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if (psi_kill_exceeded_timer >= psi_excess_duration and
|
||||||
|
psi_post_action_delay_exceeded):
|
||||||
|
|
||||||
|
mem_info = 'PSI avg ({}) > sigkill_psi_threshold ({})\n' \
|
||||||
|
'PSI avg exceeded psi_excess_duration (value' \
|
||||||
|
' = {} sec) for {} seconds'.format(
|
||||||
|
psi_avg_value,
|
||||||
|
sigkill_psi_threshold,
|
||||||
|
psi_excess_duration,
|
||||||
|
round(psi_kill_exceeded_timer, 1)
|
||||||
|
)
|
||||||
|
|
||||||
|
# psi_t0 = time() # ВОТ ГДЕ ПРОБЛЕМА. Таймер надо сбрасывать именно после применения корректирующего действия. Именно ПОСЛЕ. А не здесь.
|
||||||
|
# Или после любого применения, или после успешного.
|
||||||
|
# Если жертва умерла в процессе поиска - сбрасываем. Если отправлен
|
||||||
|
# сигнал - сбрасываем.
|
||||||
|
|
||||||
|
return SIGKILL, mem_info, psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0
|
||||||
|
|
||||||
|
if psi_avg_value >= sigterm_psi_threshold:
|
||||||
|
sigterm_psi_exceeded = True
|
||||||
|
psi_term_exceeded_timer += delta0
|
||||||
|
else:
|
||||||
|
sigterm_psi_exceeded = False
|
||||||
|
psi_term_exceeded_timer = 0
|
||||||
|
|
||||||
|
if psi_debug:
|
||||||
|
|
||||||
|
log('sigterm_psi_exceeded: {}\n'
|
||||||
|
'psi_term_exceeded_timer: {}\n'.format(
|
||||||
|
sigterm_psi_exceeded,
|
||||||
|
round(psi_term_exceeded_timer, 1)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if (psi_term_exceeded_timer >= psi_excess_duration and
|
||||||
|
psi_post_action_delay_exceeded):
|
||||||
|
|
||||||
|
mem_info = 'PSI avg ({}) > sigterm_psi_threshold ({})\n' \
|
||||||
|
'PSI avg exceeded psi_excess_duration (value' \
|
||||||
|
' = {} sec) for {} seconds'.format(
|
||||||
|
psi_avg_value,
|
||||||
|
sigterm_psi_threshold,
|
||||||
|
psi_excess_duration,
|
||||||
|
round(psi_term_exceeded_timer, 1)
|
||||||
|
)
|
||||||
|
|
||||||
|
return SIGTERM, mem_info, psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0
|
||||||
|
|
||||||
|
if gui_low_memory_warnings:
|
||||||
|
|
||||||
|
if psi_avg_value >= psi_avg_warnings:
|
||||||
|
return 'WARN', None, psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0
|
||||||
|
|
||||||
|
return None, None, psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0
|
||||||
|
|
||||||
|
|
||||||
def is_victim_alive(pid):
|
def is_victim_alive(pid):
|
||||||
@ -1384,31 +1495,26 @@ def is_victim_alive(pid):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def implement_corrective_action(
|
||||||
|
threshold,
|
||||||
|
mem_info_list,
|
||||||
|
psi_t0,
|
||||||
|
psi_kill_exceeded_timer,
|
||||||
|
psi_term_exceeded_timer,
|
||||||
|
x0, psi_s, zram_s, zram_m, psi_m):
|
||||||
def implement_corrective_action(signal, mem_info):
|
|
||||||
"""
|
"""
|
||||||
Find victim with highest badness and send SIGTERM/SIGKILL
|
Find victim with highest badness and send SIGTERM/SIGKILL
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Ёбаная запутанная фция. Распутать всё нахуй. Выделить части в отдельн фции.
|
# Ёбаная запутанная фция. Распутать всё нахуй. Выделить части в отдельн фции.
|
||||||
# Разбить саму фцию на части. Сделать ее структуру простой и понятной.
|
# Разбить саму фцию на части. Сделать ее структуру простой и понятной.
|
||||||
|
|
||||||
|
time0 = time() # начало корр действия. Для вычисл времени действия.
|
||||||
time0 = time() # начало корр действия. Для вычисл времени действия.
|
|
||||||
|
|
||||||
|
|
||||||
# выходим из фции, если для SIGTERM порога не превышено время
|
# выходим из фции, если для SIGTERM порога не превышено время
|
||||||
# min_delay_after_sigterm и спим в течение over_sleep
|
# min_delay_after_sigterm и спим в течение over_sleep
|
||||||
# если хард порог превышен - идем дальше.
|
# если хард порог превышен - идем дальше.
|
||||||
if signal is SIGTERM:
|
if threshold is SIGTERM:
|
||||||
|
|
||||||
dt = time() - actions_time_dict['action_handled'][0]
|
dt = time() - actions_time_dict['action_handled'][0]
|
||||||
|
|
||||||
@ -1422,7 +1528,7 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
|
|
||||||
sleep(over_sleep)
|
sleep(over_sleep)
|
||||||
|
|
||||||
return None # время задержки между действиями не истекло
|
return psi_t0 # время задержки между действиями не истекло
|
||||||
else:
|
else:
|
||||||
log('min_delay_after_sigterm IS EXCEEDED, it is time to action')
|
log('min_delay_after_sigterm IS EXCEEDED, it is time to action')
|
||||||
|
|
||||||
@ -1442,60 +1548,104 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
log(mem_info)
|
for i in mem_info_list:
|
||||||
|
log(i)
|
||||||
|
|
||||||
# ищем жертву с ее бэднес.
|
# ищем жертву с ее бэднес.
|
||||||
pid, victim_badness, name = find_victim(print_proc_table)
|
pid, victim_badness, name, victim_id = find_victim(print_proc_table)
|
||||||
# sleep(0.1)
|
# sleep(0.1)
|
||||||
|
|
||||||
new_signal, mem_info = check_mem_swap_ex()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#log(new_signal)
|
|
||||||
#log(mem_info)
|
|
||||||
|
|
||||||
if new_signal is None:
|
|
||||||
|
|
||||||
|
|
||||||
|
log('Recheck memory levels...')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# перепроверяем пороги: они могли измениться за время поиска жертвы
|
||||||
|
(masf_s, masf_m, mem_available, swap_min_sigkill_kb, swap_min_sigterm_kb,
|
||||||
|
swap_free, swap_total) = check_mem_swap_ex()
|
||||||
|
|
||||||
|
|
||||||
|
if CHECK_ZRAM:
|
||||||
|
zram_s, zram_m, mem_used_zram = check_zram_ex()
|
||||||
|
|
||||||
|
if CHECK_PSI:
|
||||||
|
(psi_s, psi_m, psi_t0, psi_kill_exceeded_timer,
|
||||||
|
psi_term_exceeded_timer, x0) = check_psi_ex(
|
||||||
|
psi_t0,psi_kill_exceeded_timer,psi_term_exceeded_timer,x0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if masf_s is SIGKILL or zram_s is SIGKILL or psi_s is SIGKILL:
|
||||||
|
|
||||||
|
new_threshold = SIGKILL
|
||||||
|
mem_info_list = []
|
||||||
|
|
||||||
|
if masf_s is SIGKILL or masf_s is SIGTERM:
|
||||||
|
mem_info_list.append(masf_m)
|
||||||
|
|
||||||
|
if zram_s is SIGKILL or zram_s is SIGTERM:
|
||||||
|
mem_info_list.append(zram_m)
|
||||||
|
|
||||||
|
if psi_s is SIGKILL or psi_s is SIGTERM:
|
||||||
|
mem_info_list.append(psi_m)
|
||||||
|
|
||||||
|
elif masf_s is SIGTERM or zram_s is SIGTERM or psi_s is SIGTERM:
|
||||||
|
|
||||||
|
new_threshold = SIGTERM
|
||||||
|
mem_info_list = []
|
||||||
|
|
||||||
|
if masf_s is SIGKILL or masf_s is SIGTERM:
|
||||||
|
mem_info_list.append(masf_m)
|
||||||
|
|
||||||
|
if zram_s is SIGKILL or zram_s is SIGTERM:
|
||||||
|
mem_info_list.append(zram_m)
|
||||||
|
|
||||||
|
if psi_s is SIGKILL or psi_s is SIGTERM:
|
||||||
|
mem_info_list.append(psi_m)
|
||||||
|
|
||||||
|
else:
|
||||||
log('Thresholds is not exceeded now')
|
log('Thresholds is not exceeded now')
|
||||||
return None
|
return psi_t0
|
||||||
|
|
||||||
if new_signal is not signal:
|
|
||||||
log(mem_info)
|
|
||||||
signal = new_signal
|
|
||||||
|
|
||||||
#log(mem_info)
|
|
||||||
|
|
||||||
|
# печать порогов
|
||||||
|
for i in mem_info_list:
|
||||||
|
log(i)
|
||||||
|
|
||||||
|
# может это излишне
|
||||||
|
if new_threshold is None or new_threshold == 'WARN':
|
||||||
|
log('Thresholds is not exceeded now')
|
||||||
|
return psi_t0
|
||||||
|
|
||||||
|
threshold = new_threshold
|
||||||
|
|
||||||
if victim_badness >= min_badness:
|
if victim_badness >= min_badness:
|
||||||
|
|
||||||
|
psi_t0 = time()
|
||||||
|
|
||||||
if print_victim_info:
|
if print_victim_info:
|
||||||
victim_info = find_victim_info(pid, victim_badness, name)
|
victim_info = find_victim_info(pid, victim_badness, name)
|
||||||
log(victim_info)
|
log(victim_info)
|
||||||
|
|
||||||
# пороги могли превысиься за время поиска жертвы (поиск может занимать
|
|
||||||
# сотни миллисекунд)
|
|
||||||
mem_available, swap_total, swap_free = check_mem_and_swap()
|
|
||||||
|
|
||||||
ma_mib = int(mem_available) / 1024.0
|
|
||||||
sf_mib = int(swap_free) / 1024.0
|
|
||||||
log('Memory status before implementing a corrective act'
|
|
||||||
'ion:\n MemAvailable'
|
|
||||||
': {} MiB, SwapFree: {} MiB'.format(
|
|
||||||
round(ma_mib, 1), round(sf_mib, 1)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if (mem_available <= mem_min_sigkill_kb and
|
|
||||||
swap_free <= swap_min_sigkill_kb):
|
|
||||||
log('Hard threshold exceeded')
|
|
||||||
signal = SIGKILL
|
|
||||||
|
|
||||||
victim_id = get_victim_id(pid)
|
|
||||||
|
|
||||||
# kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ
|
# kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ
|
||||||
# ЗАДАННОГО ВРЕМЕНИ
|
# ЗАДАННОГО ВРЕМЕНИ
|
||||||
|
|
||||||
# переопределяем сигнал для старых жертв
|
# переопределяем сигнал для старых жертв
|
||||||
if signal is SIGTERM:
|
if threshold is SIGTERM:
|
||||||
|
|
||||||
if victim_id in victim_dict:
|
if victim_id in victim_dict:
|
||||||
|
|
||||||
@ -1504,12 +1654,12 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
if dt > max_post_sigterm_victim_lifetime:
|
if dt > max_post_sigterm_victim_lifetime:
|
||||||
print('max_post_sigterm_victim_lifetime exceeded: the '
|
print('max_post_sigterm_victim_lifetime exceeded: the '
|
||||||
'victim will get SIGKILL')
|
'victim will get SIGKILL')
|
||||||
signal = SIGKILL
|
threshold = SIGKILL
|
||||||
|
|
||||||
# matching with re to customize corrective actions
|
# matching with re to customize corrective actions
|
||||||
soft_match = False
|
soft_match = False
|
||||||
|
|
||||||
if soft_actions and signal is SIGTERM:
|
if soft_actions and threshold is SIGTERM:
|
||||||
name = pid_to_name(pid)
|
name = pid_to_name(pid)
|
||||||
cgroup_v1 = pid_to_cgroup_v1(pid)
|
cgroup_v1 = pid_to_cgroup_v1(pid)
|
||||||
service = ''
|
service = ''
|
||||||
@ -1530,9 +1680,7 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
soft_match = True
|
soft_match = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if soft_match: # ПЕРЕОПРЕДЕЛЕНИЕ МЯГКОГО КОРРЕКТИРУЮЩЕГО ДЕЙСТВИЯ
|
||||||
|
|
||||||
if soft_match: # ПЕРЕОПРЕДЕЛЕНИЕ МЯГКОГО КОРРЕКТИРУЮЩЕГО ДЕЙСТВИЯ
|
|
||||||
|
|
||||||
# todo: make new func
|
# todo: make new func
|
||||||
m = check_mem_and_swap()
|
m = check_mem_and_swap()
|
||||||
@ -1588,16 +1736,11 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
|
|
||||||
# обычное действие через сигнал
|
# обычное действие через сигнал
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# вот тут поработать. Тут ебаный цикл. Нахуй его.
|
# вот тут поработать. Тут ебаный цикл. Нахуй его.
|
||||||
|
|
||||||
|
try: # вот тут засрано, в блоке try должно быть только kill(), остальное ниже за пределами
|
||||||
|
|
||||||
|
os.kill(int(pid), threshold)
|
||||||
|
|
||||||
try:
|
|
||||||
|
|
||||||
os.kill(int(pid), signal)
|
|
||||||
kill_timestamp = time()
|
kill_timestamp = time()
|
||||||
response_time = kill_timestamp - time0
|
response_time = kill_timestamp - time0
|
||||||
|
|
||||||
@ -1625,10 +1768,7 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
log('Process exited (VmRSS = 0) in {} sec'.format(
|
log('Process exited (VmRSS = 0) in {} sec'.format(
|
||||||
round(dt, 5)))
|
round(dt, 5)))
|
||||||
|
|
||||||
|
if threshold is SIGKILL or victim_alive == 2:
|
||||||
|
|
||||||
|
|
||||||
if signal is SIGKILL or victim_alive == 2:
|
|
||||||
# жертва умирает от SIGKILL. Дожидаемся ее полной смерти.
|
# жертва умирает от SIGKILL. Дожидаемся ее полной смерти.
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@ -1641,8 +1781,7 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
log('The victim died in {} sec'.format(
|
log('The victim died in {} sec'.format(
|
||||||
round(kill_duration, 3)))
|
round(kill_duration, 3)))
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
mem_available, swap_total, swap_free = check_mem_and_swap()
|
mem_available, swap_total, swap_free = check_mem_and_swap()
|
||||||
|
|
||||||
ma_mib = int(mem_available) / 1024.0
|
ma_mib = int(mem_available) / 1024.0
|
||||||
@ -1653,17 +1792,18 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
round(ma_mib, 1), round(sf_mib, 1)
|
round(ma_mib, 1), round(sf_mib, 1)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
send_result = 'total response time: {} ms'.format(
|
send_result = 'total response time: {} ms'.format(
|
||||||
round(response_time * 1000))
|
round(response_time * 1000))
|
||||||
|
|
||||||
preventing_oom_message = 'Implement a corrective action:' \
|
preventing_oom_message = 'Implement a corrective action:' \
|
||||||
'\n Send {} to the victim; {}'.format(
|
'\n Send {} to the victim; {}'.format(
|
||||||
sig_dict[signal], send_result)
|
sig_dict[threshold], send_result)
|
||||||
|
|
||||||
key = 'Send {} to {}'.format(sig_dict[signal], name)
|
key = 'Send {} to {}'.format(sig_dict[threshold], name)
|
||||||
|
|
||||||
if signal is SIGKILL and post_kill_exe != '':
|
if threshold is SIGKILL and post_kill_exe != '':
|
||||||
|
|
||||||
cmd = post_kill_exe.replace('$PID', pid).replace(
|
cmd = post_kill_exe.replace('$PID', pid).replace(
|
||||||
'$NAME', pid_to_name(pid))
|
'$NAME', pid_to_name(pid))
|
||||||
@ -1673,7 +1813,7 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
exe(cmd)
|
exe(cmd)
|
||||||
|
|
||||||
if gui_notifications:
|
if gui_notifications:
|
||||||
send_notify(signal, name, pid)
|
send_notify(threshold, name, pid)
|
||||||
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
response_time = time() - time0
|
response_time = time() - time0
|
||||||
@ -1696,10 +1836,11 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
|
|
||||||
update_stat_dict_and_print(key)
|
update_stat_dict_and_print(key)
|
||||||
|
|
||||||
|
|
||||||
# нехуй делать, бэднес жертвы слишком мал
|
# нехуй делать, бэднес жертвы слишком мал
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
# может эту часть наверх отправить через if
|
||||||
|
|
||||||
response_time = time() - time0
|
response_time = time() - time0
|
||||||
victim_badness_is_too_small = 'victim badness {} < min_b' \
|
victim_badness_is_too_small = 'victim badness {} < min_b' \
|
||||||
'adness {}; nothing to do; response time: {} ms'.format(
|
'adness {}; nothing to do; response time: {} ms'.format(
|
||||||
@ -1724,21 +1865,13 @@ def implement_corrective_action(signal, mem_info):
|
|||||||
|
|
||||||
print('##################################################################')
|
print('##################################################################')
|
||||||
|
|
||||||
|
sleep(over_sleep) # Спать если бэднес жертвы мал
|
||||||
|
|
||||||
|
# Что делать с psi_t0 если у мертвы мал бэднес? Ничего, потому что кор действия не было.
|
||||||
|
# демон может жрать 10% цпу при этом. Можно отдельн парам ввести. А можно
|
||||||
|
# не вводить. кек
|
||||||
|
|
||||||
|
return psi_t0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def sleep_after_check_mem():
|
def sleep_after_check_mem():
|
||||||
@ -1802,8 +1935,8 @@ def sleep_after_check_mem():
|
|||||||
|
|
||||||
log(
|
log(
|
||||||
'Sleep {} sec (t_mem={}, t_swap={}{})'.format(
|
'Sleep {} sec (t_mem={}, t_swap={}{})'.format(
|
||||||
round(t, 2),round(t_mem, 2),round(t_swap, 2), z)
|
round(t, 2), round(t_mem, 2), round(t_swap, 2), z)
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
stdout.flush()
|
stdout.flush()
|
||||||
@ -1874,27 +2007,9 @@ def calculate_percent(arg_key):
|
|||||||
return mem_min_kb, mem_min_mb, mem_min_percent
|
return mem_min_kb, mem_min_mb, mem_min_percent
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
victim_dict = dict()
|
victim_dict = dict()
|
||||||
victim_id = None
|
victim_id = None
|
||||||
actions_time_dict = dict()
|
actions_time_dict = dict()
|
||||||
@ -1902,31 +2017,6 @@ actions_time_dict['action_handled'] = [time(), victim_id]
|
|||||||
# print(actions_time_dict)
|
# print(actions_time_dict)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
start_time = time()
|
start_time = time()
|
||||||
|
|
||||||
|
|
||||||
@ -1974,17 +2064,6 @@ else:
|
|||||||
notify_helper_path = '/usr/sbin/nohang_notify_helper'
|
notify_helper_path = '/usr/sbin/nohang_notify_helper'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# will store corrective actions stat
|
# will store corrective actions stat
|
||||||
stat_dict = dict()
|
stat_dict = dict()
|
||||||
|
|
||||||
@ -2319,8 +2398,6 @@ print_victim_info = conf_parse_bool('print_victim_info')
|
|||||||
print_victim_cmdline = conf_parse_bool('print_victim_cmdline')
|
print_victim_cmdline = conf_parse_bool('print_victim_cmdline')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
print_config = conf_parse_bool('print_config')
|
print_config = conf_parse_bool('print_config')
|
||||||
print_mem_check_results = conf_parse_bool('print_mem_check_results')
|
print_mem_check_results = conf_parse_bool('print_mem_check_results')
|
||||||
print_sleep_periods = conf_parse_bool('print_sleep_periods')
|
print_sleep_periods = conf_parse_bool('print_sleep_periods')
|
||||||
@ -2459,6 +2536,20 @@ else:
|
|||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if 'psi_avg_warnings' in config_dict:
|
||||||
|
psi_avg_warnings = string_to_float_convert_test(
|
||||||
|
config_dict['psi_avg_warnings'])
|
||||||
|
if psi_avg_warnings is None:
|
||||||
|
errprint('Invalid psi_avg_warnings value, not float\nExit')
|
||||||
|
exit(1)
|
||||||
|
if psi_avg_warnings < 0 or psi_avg_warnings > 100:
|
||||||
|
errprint('psi_avg_warnings must be in the range [0; 100]\nExit')
|
||||||
|
exit(1)
|
||||||
|
else:
|
||||||
|
errprint('psi_avg_warnings not in config\nExit')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
if 'min_badness' in config_dict:
|
if 'min_badness' in config_dict:
|
||||||
min_badness = string_to_int_convert_test(
|
min_badness = string_to_int_convert_test(
|
||||||
config_dict['min_badness'])
|
config_dict['min_badness'])
|
||||||
@ -2876,9 +2967,6 @@ psi_avg_string = '' # will be overwritten if PSI monitoring enabled
|
|||||||
|
|
||||||
mem_used_zram = 0
|
mem_used_zram = 0
|
||||||
|
|
||||||
if psi_support and not ignore_psi:
|
|
||||||
psi_t0 = time()
|
|
||||||
|
|
||||||
|
|
||||||
if print_mem_check_results:
|
if print_mem_check_results:
|
||||||
|
|
||||||
@ -2895,10 +2983,25 @@ for i in sig_list:
|
|||||||
signal(i, signal_handler)
|
signal(i, signal_handler)
|
||||||
|
|
||||||
|
|
||||||
|
x0 = time()
|
||||||
|
delta0 = 0
|
||||||
|
|
||||||
|
|
||||||
|
threshold = None
|
||||||
|
mem_info = None
|
||||||
|
|
||||||
|
|
||||||
|
#print(x0, 'x0')
|
||||||
|
|
||||||
CHECK_PSI = False
|
CHECK_PSI = False
|
||||||
if psi_support and not ignore_psi:
|
if psi_support and not ignore_psi:
|
||||||
CHECK_PSI = True
|
CHECK_PSI = True
|
||||||
|
|
||||||
|
psi_kill_exceeded_timer = 0
|
||||||
|
psi_term_exceeded_timer = 0
|
||||||
|
psi_t0 = time()
|
||||||
|
psi_s = zram_s = zram_m = psi_m = None
|
||||||
|
|
||||||
|
|
||||||
CHECK_ZRAM = not ignore_zram
|
CHECK_ZRAM = not ignore_zram
|
||||||
|
|
||||||
@ -2907,58 +3010,36 @@ log('Monitoring has started!')
|
|||||||
stdout.flush()
|
stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
psi_kill_exceeded_timer = psi_term_exceeded_timer = delta0 = 0
|
|
||||||
|
|
||||||
x0 = time()
|
|
||||||
|
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
delta0 = time() - x0
|
(masf_s, masf_m, mem_available, swap_min_sigkill_kb, swap_min_sigterm_kb,
|
||||||
x0 = time()
|
swap_free, swap_total) = check_mem_swap_ex()
|
||||||
|
|
||||||
# FIND VALUES: mem, swap, zram, psi
|
|
||||||
|
|
||||||
mem_available, swap_total, swap_free = check_mem_and_swap()
|
|
||||||
|
|
||||||
# if swap_min_sigkill is set in percent
|
|
||||||
if swap_kill_is_percent:
|
|
||||||
swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0
|
|
||||||
|
|
||||||
if swap_term_is_percent:
|
|
||||||
swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0
|
|
||||||
|
|
||||||
if swap_warn_is_percent:
|
|
||||||
swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0
|
|
||||||
|
|
||||||
if swap_total > swap_min_sigkill_kb:
|
|
||||||
swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1))
|
|
||||||
else:
|
|
||||||
swap_sigkill_pc = '-'
|
|
||||||
|
|
||||||
if swap_total > swap_min_sigterm_kb:
|
|
||||||
swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1))
|
|
||||||
else:
|
|
||||||
swap_sigterm_pc = '-'
|
|
||||||
|
|
||||||
if CHECK_ZRAM:
|
if CHECK_ZRAM:
|
||||||
mem_used_zram = check_zram()
|
zram_s, zram_m, mem_used_zram = check_zram_ex()
|
||||||
|
|
||||||
if CHECK_PSI:
|
if CHECK_PSI:
|
||||||
psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
|
(psi_s, psi_m, psi_t0, psi_kill_exceeded_timer,
|
||||||
if time() - psi_t0 >= psi_post_action_delay:
|
psi_term_exceeded_timer, x0) = check_psi_ex(
|
||||||
psi_post_action_delay_exceeded = True
|
psi_t0,psi_kill_exceeded_timer,psi_term_exceeded_timer,x0)
|
||||||
else:
|
|
||||||
psi_post_action_delay_exceeded = False
|
|
||||||
|
|
||||||
if print_mem_check_results:
|
|
||||||
psi_avg_string = 'PSI avg: {} | '.format(
|
|
||||||
str(psi_avg_value).rjust(6))
|
|
||||||
|
|
||||||
if print_mem_check_results:
|
if print_mem_check_results:
|
||||||
|
|
||||||
|
if CHECK_PSI:
|
||||||
|
psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
|
||||||
|
if time() - psi_t0 >= psi_post_action_delay:
|
||||||
|
psi_post_action_delay_exceeded = True
|
||||||
|
else:
|
||||||
|
psi_post_action_delay_exceeded = False
|
||||||
|
|
||||||
|
if print_mem_check_results:
|
||||||
|
psi_avg_string = 'PSI avg: {} | '.format(
|
||||||
|
str(psi_avg_value).rjust(6))
|
||||||
|
|
||||||
wt1 = time()
|
wt1 = time()
|
||||||
|
|
||||||
delta = (mem_available + swap_free) - new_mem
|
delta = (mem_available + swap_free) - new_mem
|
||||||
@ -3024,168 +3105,64 @@ while True:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
# CHECK HARD THRESHOLDS (SIGKILL LEVEL)
|
if masf_s is SIGKILL or zram_s is SIGKILL or psi_s is SIGKILL:
|
||||||
|
|
||||||
if (mem_available <= mem_min_sigkill_kb and
|
threshold = SIGKILL
|
||||||
swap_free <= swap_min_sigkill_kb):
|
mem_info_list = []
|
||||||
|
|
||||||
mem_info = 'Hard threshold exceeded\nMemory status that requ' \
|
if masf_m is not None:
|
||||||
'ires corrective actions:' \
|
mem_info_list.append(masf_m)
|
||||||
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
|
||||||
'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
|
||||||
'p_min_sigkill [{} MiB, {} %]'.format(
|
|
||||||
kib_to_mib(mem_available),
|
|
||||||
percent(mem_available / mem_total),
|
|
||||||
kib_to_mib(mem_min_sigkill_kb),
|
|
||||||
percent(mem_min_sigkill_kb / mem_total),
|
|
||||||
kib_to_mib(swap_free),
|
|
||||||
percent(swap_free / (swap_total + 0.1)),
|
|
||||||
kib_to_mib(swap_min_sigkill_kb),
|
|
||||||
swap_sigkill_pc)
|
|
||||||
|
|
||||||
implement_corrective_action(SIGKILL, mem_info)
|
if zram_m is not None:
|
||||||
psi_t0 = time()
|
mem_info_list.append(zram_m)
|
||||||
|
|
||||||
|
if psi_m is not None:
|
||||||
|
mem_info_list.append(psi_m)
|
||||||
|
|
||||||
|
psi_t0 = implement_corrective_action(
|
||||||
|
threshold,
|
||||||
|
mem_info_list,
|
||||||
|
psi_t0,
|
||||||
|
psi_kill_exceeded_timer,
|
||||||
|
psi_term_exceeded_timer,
|
||||||
|
x0, psi_s, zram_s, zram_m, psi_m)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if CHECK_ZRAM:
|
if masf_s is SIGTERM or zram_s is SIGTERM or psi_s is SIGTERM:
|
||||||
if mem_used_zram >= zram_max_sigkill_kb:
|
|
||||||
|
|
||||||
mem_info = 'Hard threshold exceeded\nMemory status that requir' \
|
threshold = SIGTERM
|
||||||
'es corrective actions:' \
|
mem_info_list = []
|
||||||
'\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
|
|
||||||
'kill [{} MiB, {} %]'.format(
|
|
||||||
kib_to_mib(mem_used_zram),
|
|
||||||
percent(mem_used_zram / mem_total),
|
|
||||||
kib_to_mib(zram_max_sigkill_kb),
|
|
||||||
percent(zram_max_sigkill_kb / mem_total))
|
|
||||||
|
|
||||||
implement_corrective_action(SIGKILL, mem_info)
|
if masf_m is not None:
|
||||||
psi_t0 = time()
|
mem_info_list.append(masf_m)
|
||||||
continue
|
|
||||||
|
|
||||||
if CHECK_PSI:
|
if zram_m is not None:
|
||||||
|
mem_info_list.append(zram_m)
|
||||||
|
|
||||||
if psi_avg_value >= sigkill_psi_threshold:
|
if psi_m is not None:
|
||||||
sigkill_psi_exceeded = True
|
mem_info_list.append(psi_m)
|
||||||
psi_kill_exceeded_timer += delta0
|
|
||||||
else:
|
|
||||||
sigkill_psi_exceeded = False
|
|
||||||
psi_kill_exceeded_timer = 0
|
|
||||||
|
|
||||||
if psi_debug:
|
psi_t0 = implement_corrective_action(
|
||||||
|
threshold,
|
||||||
log('psi_post_action_delay_exceeded: {}\nsigkill_psi_exceeded'
|
mem_info_list,
|
||||||
': {}\npsi_kill_exceeded_timer: {}'.format(
|
psi_t0,
|
||||||
psi_post_action_delay_exceeded,
|
psi_kill_exceeded_timer,
|
||||||
sigkill_psi_exceeded,
|
psi_term_exceeded_timer,
|
||||||
round(psi_kill_exceeded_timer, 1)
|
x0, psi_s, zram_s, zram_m, psi_m)
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if (psi_kill_exceeded_timer >= psi_excess_duration and
|
|
||||||
psi_post_action_delay_exceeded):
|
|
||||||
|
|
||||||
mem_info = 'PSI avg ({}) > sigkill_psi_threshold ({})\n' \
|
|
||||||
'PSI avg exceeded psi_excess_duration (value' \
|
|
||||||
' = {} sec) for {} seconds'.format(
|
|
||||||
psi_avg_value,
|
|
||||||
sigkill_psi_threshold,
|
|
||||||
psi_excess_duration,
|
|
||||||
round(psi_kill_exceeded_timer, 1)
|
|
||||||
)
|
|
||||||
|
|
||||||
implement_corrective_action(SIGKILL, mem_info)
|
|
||||||
psi_t0 = time()
|
|
||||||
continue
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
# CHECK SOFT THRESHOLDS (SIGTERM LEVEL)
|
|
||||||
|
|
||||||
if (mem_available <= mem_min_sigterm_kb and
|
|
||||||
swap_free <= swap_min_sigterm_kb):
|
|
||||||
|
|
||||||
mem_info = 'Soft threshold exceeded\nMemory status that requi' \
|
|
||||||
'res corrective actions:' \
|
|
||||||
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
|
||||||
'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
|
||||||
'p_min_sigterm [{} MiB, {} %]'.format(
|
|
||||||
kib_to_mib(mem_available),
|
|
||||||
percent(mem_available / mem_total),
|
|
||||||
kib_to_mib(mem_min_sigterm_kb),
|
|
||||||
round(mem_min_sigterm_percent, 1),
|
|
||||||
kib_to_mib(swap_free),
|
|
||||||
percent(swap_free / (swap_total + 0.1)),
|
|
||||||
kib_to_mib(swap_min_sigterm_kb),
|
|
||||||
swap_sigterm_pc)
|
|
||||||
|
|
||||||
implement_corrective_action(SIGTERM, mem_info)
|
|
||||||
psi_t0 = time()
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if CHECK_ZRAM:
|
|
||||||
if mem_used_zram >= zram_max_sigterm_kb:
|
|
||||||
|
|
||||||
mem_info = 'Soft threshold exceeded\nMemory status that require' \
|
|
||||||
's corrective actions:\n MemUsedZram [{} MiB, {} %] >= zra' \
|
|
||||||
'm_max_sigterm [{} M, {} %]'.format(
|
|
||||||
kib_to_mib(mem_used_zram),
|
|
||||||
percent(mem_used_zram / mem_total),
|
|
||||||
kib_to_mib(zram_max_sigterm_kb),
|
|
||||||
percent(zram_max_sigterm_kb / mem_total))
|
|
||||||
|
|
||||||
implement_corrective_action(SIGTERM, mem_info)
|
|
||||||
psi_t0 = time()
|
|
||||||
continue
|
|
||||||
|
|
||||||
if CHECK_PSI:
|
|
||||||
if psi_avg_value >= sigterm_psi_threshold:
|
|
||||||
sigterm_psi_exceeded = True
|
|
||||||
psi_term_exceeded_timer += delta0
|
|
||||||
else:
|
|
||||||
sigterm_psi_exceeded = False
|
|
||||||
psi_term_exceeded_timer = 0
|
|
||||||
|
|
||||||
if psi_debug:
|
|
||||||
|
|
||||||
log('sigterm_psi_exceeded: {}\n'
|
|
||||||
'psi_term_exceeded_timer: {}\n'.format(
|
|
||||||
sigterm_psi_exceeded,
|
|
||||||
round(psi_term_exceeded_timer, 1)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
if (psi_term_exceeded_timer >= psi_excess_duration and
|
|
||||||
psi_post_action_delay_exceeded):
|
|
||||||
|
|
||||||
mem_info = 'PSI avg ({}) > sigterm_psi_threshold ({})\n' \
|
|
||||||
'PSI avg exceeded psi_excess_duration (value' \
|
|
||||||
' = {} sec) for {} seconds'.format(
|
|
||||||
psi_avg_value,
|
|
||||||
sigterm_psi_threshold,
|
|
||||||
psi_excess_duration,
|
|
||||||
round(psi_term_exceeded_timer, 1)
|
|
||||||
)
|
|
||||||
|
|
||||||
implement_corrective_action(SIGTERM, mem_info)
|
|
||||||
psi_t0 = time()
|
|
||||||
continue
|
|
||||||
|
|
||||||
###########################################################################
|
|
||||||
|
|
||||||
if gui_low_memory_warnings:
|
if gui_low_memory_warnings:
|
||||||
|
|
||||||
if (mem_available <= mem_min_warnings_kb and
|
if masf_s == 'WARN' or zram_s == 'WARN' or psi_s == 'WARN':
|
||||||
swap_free <= swap_min_warnings_kb + 0.1 or
|
|
||||||
mem_used_zram >= zram_max_warnings_kb):
|
|
||||||
|
|
||||||
warn_time_delta = time() - warn_time_now
|
warn_time_delta = time() - warn_time_now
|
||||||
warn_time_now = time()
|
warn_time_now = time()
|
||||||
warn_timer += warn_time_delta
|
warn_timer += warn_time_delta
|
||||||
if warn_timer > min_time_between_warnings:
|
if warn_timer > min_time_between_warnings:
|
||||||
|
|
||||||
send_notify_warn()
|
send_notify_warn()
|
||||||
|
|
||||||
warn_timer = 0
|
warn_timer = 0
|
||||||
|
|
||||||
sleep_after_check_mem()
|
sleep_after_check_mem()
|
||||||
|
@ -107,7 +107,7 @@ sigterm_psi_threshold = 60
|
|||||||
sigkill_psi_threshold = 90
|
sigkill_psi_threshold = 90
|
||||||
|
|
||||||
>= 0, float
|
>= 0, float
|
||||||
psi_excess_duration = 30
|
psi_excess_duration = 40
|
||||||
|
|
||||||
psi_post_action_delay = 20
|
psi_post_action_delay = 20
|
||||||
|
|
||||||
@ -289,6 +289,8 @@ swap_min_warnings = 50 %
|
|||||||
|
|
||||||
zram_max_warnings = 40 %
|
zram_max_warnings = 40 %
|
||||||
|
|
||||||
|
psi_avg_warnings = 60
|
||||||
|
|
||||||
Valid values are floating-point numbers from the range [1; 300].
|
Valid values are floating-point numbers from the range [1; 300].
|
||||||
|
|
||||||
min_time_between_warnings = 15
|
min_time_between_warnings = 15
|
||||||
@ -336,7 +338,7 @@ print_victim_info = True
|
|||||||
|
|
||||||
print_victim_cmdline = False
|
print_victim_cmdline = False
|
||||||
|
|
||||||
max_ancestry_depth = 1
|
max_ancestry_depth = 5
|
||||||
|
|
||||||
separate_log = False
|
separate_log = False
|
||||||
|
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
some avg10=29.70 avg60=51.59 avg300=22.92 total=195239452
|
some avg10=56.70 avg60=51.59 avg300=22.92 total=195239452
|
||||||
full avg10=28.82 avg60=49.77 avg300=21.83 total=182504463
|
full avg10=28.82 avg60=49.77 avg300=21.83 total=182504463
|
||||||
|
Loading…
Reference in New Issue
Block a user