do not check zram by default
This commit is contained in:
parent
77da0efb9a
commit
944c13be7e
593
nohang
593
nohang
@ -11,92 +11,6 @@ from sre_constants import error as invalid_re
|
|||||||
from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
|
from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
|
||||||
|
|
||||||
|
|
||||||
start_time = time()
|
|
||||||
|
|
||||||
|
|
||||||
help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG]
|
|
||||||
|
|
||||||
optional arguments:
|
|
||||||
-h, --help show this help message and exit
|
|
||||||
-v, --version print version
|
|
||||||
-t, --test print some tests
|
|
||||||
-p, --print-proc-table
|
|
||||||
print table of processes with their badness values
|
|
||||||
-c CONFIG, --config CONFIG
|
|
||||||
path to the config file, default values:
|
|
||||||
./nohang.conf, /etc/nohang/nohang.conf"""
|
|
||||||
|
|
||||||
|
|
||||||
SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
|
|
||||||
|
|
||||||
SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE'])
|
|
||||||
|
|
||||||
conf_err_mess = 'Invalid config. Exit.'
|
|
||||||
|
|
||||||
sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP]
|
|
||||||
|
|
||||||
sig_dict = {
|
|
||||||
SIGKILL: 'SIGKILL',
|
|
||||||
SIGINT: 'SIGINT',
|
|
||||||
SIGQUIT: 'SIGQUIT',
|
|
||||||
SIGHUP: 'SIGHUP',
|
|
||||||
SIGTERM: 'SIGTERM'
|
|
||||||
}
|
|
||||||
|
|
||||||
self_pid = str(os.getpid())
|
|
||||||
|
|
||||||
self_uid = os.geteuid()
|
|
||||||
|
|
||||||
if self_uid == 0:
|
|
||||||
root = True
|
|
||||||
else:
|
|
||||||
root = False
|
|
||||||
|
|
||||||
|
|
||||||
if os.path.exists('./nohang_notify_helper'):
|
|
||||||
notify_helper_path = './nohang_notify_helper'
|
|
||||||
else:
|
|
||||||
notify_helper_path = '/usr/sbin/nohang_notify_helper'
|
|
||||||
|
|
||||||
|
|
||||||
victim_dict = dict()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
victim_id = None
|
|
||||||
actions_time_dict = dict()
|
|
||||||
actions_time_dict['action_handled'] = [time(), victim_id]
|
|
||||||
# print(actions_time_dict)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# will store corrective actions stat
|
|
||||||
stat_dict = dict()
|
|
||||||
|
|
||||||
|
|
||||||
separate_log = False # will be overwritten after parse config
|
|
||||||
|
|
||||||
|
|
||||||
def find_cgroup_indexes():
|
|
||||||
""" Find cgroup-line positions in /proc/*/cgroup file.
|
|
||||||
"""
|
|
||||||
|
|
||||||
cgroup_v1_index = None
|
|
||||||
cgroup_v2_index = None
|
|
||||||
|
|
||||||
with open('/proc/self/cgroup') as f:
|
|
||||||
for index, line in enumerate(f):
|
|
||||||
if ':name=' in line:
|
|
||||||
cgroup_v1_index = index
|
|
||||||
if line.startswith('0::'):
|
|
||||||
cgroup_v2_index = index
|
|
||||||
|
|
||||||
return cgroup_v1_index, cgroup_v2_index
|
|
||||||
|
|
||||||
|
|
||||||
cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
|
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
# define functions
|
# define functions
|
||||||
@ -115,7 +29,62 @@ def print_self_rss():
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
def get_swap_threshold_tuple(string):
|
||||||
|
# re (Num %, True) or (Num KiB, False)
|
||||||
|
"""Returns KiB value if abs val was set in config, or tuple with %"""
|
||||||
|
# return tuple with abs and bool: (abs %, True) or (abs MiB, False)
|
||||||
|
|
||||||
|
if string.endswith('%'):
|
||||||
|
valid = string_to_float_convert_test(string[:-1])
|
||||||
|
if valid is None:
|
||||||
|
errprint('somewhere swap unit is not float_%')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
value = float(string[:-1].strip())
|
||||||
|
if value < 0 or value > 100:
|
||||||
|
errprint('invalid value, must be from the range[0; 100] %')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
return value, True
|
||||||
|
|
||||||
|
elif string.endswith('M'):
|
||||||
|
valid = string_to_float_convert_test(string[:-1])
|
||||||
|
if valid is None:
|
||||||
|
errprint('somewhere swap unit is not float_M')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
value = float(string[:-1].strip()) * 1024
|
||||||
|
if value < 0:
|
||||||
|
errprint('invalid unit in config (negative value)')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
return value, False
|
||||||
|
|
||||||
|
else:
|
||||||
|
errprint(
|
||||||
|
'Invalid config file. There are invalid units somewhere\nExit')
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def find_cgroup_indexes():
|
||||||
|
""" Find cgroup-line positions in /proc/*/cgroup file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
cgroup_v1_index = cgroup_v2_index = None
|
||||||
|
|
||||||
|
with open('/proc/self/cgroup') as f:
|
||||||
|
for index, line in enumerate(f):
|
||||||
|
if ':name=' in line:
|
||||||
|
cgroup_v1_index = index
|
||||||
|
if line.startswith('0::'):
|
||||||
|
cgroup_v2_index = index
|
||||||
|
|
||||||
|
return cgroup_v1_index, cgroup_v2_index
|
||||||
|
|
||||||
|
|
||||||
def pid_to_rss(pid):
|
def pid_to_rss(pid):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
rss = int(rline1(
|
rss = int(rline1(
|
||||||
'/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE
|
'/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE
|
||||||
@ -129,6 +98,8 @@ def pid_to_rss(pid):
|
|||||||
|
|
||||||
|
|
||||||
def pid_to_vm_size(pid):
|
def pid_to_vm_size(pid):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
vm_size = int(rline1(
|
vm_size = int(rline1(
|
||||||
'/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE
|
'/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE
|
||||||
@ -141,12 +112,6 @@ def pid_to_vm_size(pid):
|
|||||||
return vm_size
|
return vm_size
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def signal_handler(signum, frame):
|
def signal_handler(signum, frame):
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
@ -193,13 +158,6 @@ def write_self_oom_score_adj(new_value):
|
|||||||
write('/proc/self/oom_score_adj', new_value)
|
write('/proc/self/oom_score_adj', new_value)
|
||||||
|
|
||||||
|
|
||||||
self_oom_score_adj_min = '-600'
|
|
||||||
self_oom_score_adj_max = '-6'
|
|
||||||
|
|
||||||
|
|
||||||
write_self_oom_score_adj(self_oom_score_adj_min)
|
|
||||||
|
|
||||||
|
|
||||||
def valid_re(reg_exp):
|
def valid_re(reg_exp):
|
||||||
"""Validate regular expression.
|
"""Validate regular expression.
|
||||||
"""
|
"""
|
||||||
@ -431,6 +389,8 @@ def pid_to_environ(pid):
|
|||||||
|
|
||||||
|
|
||||||
def pid_to_realpath(pid):
|
def pid_to_realpath(pid):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
return os.path.realpath('/proc/' + pid + '/exe')
|
return os.path.realpath('/proc/' + pid + '/exe')
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
@ -615,9 +575,6 @@ def pid_to_status_unicode(pid):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
|
||||||
|
|
||||||
|
|
||||||
def uptime():
|
def uptime():
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
@ -993,9 +950,6 @@ def get_pid_list():
|
|||||||
return pid_list
|
return pid_list
|
||||||
|
|
||||||
|
|
||||||
pid_list = get_pid_list()
|
|
||||||
|
|
||||||
|
|
||||||
def get_non_decimal_pids():
|
def get_non_decimal_pids():
|
||||||
"""
|
"""
|
||||||
"""
|
"""
|
||||||
@ -1362,23 +1316,14 @@ def find_victim_info(pid, victim_badness, name):
|
|||||||
return victim_info
|
return victim_info
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def implement_corrective_action(signal):
|
def implement_corrective_action(signal):
|
||||||
"""
|
"""
|
||||||
Find victim with highest badness and send SIGTERM/SIGKILL
|
Find victim with highest badness and send SIGTERM/SIGKILL
|
||||||
"""
|
"""
|
||||||
|
time0 = time()
|
||||||
|
|
||||||
|
# выходим из фции, если для SIGTERM порога не превышено время
|
||||||
# выходим из фции, если для SIGTERM порога не превышено время min_delay_after_sigterm и спим в течение over_sleep
|
# min_delay_after_sigterm и спим в течение over_sleep
|
||||||
if signal is SIGTERM:
|
if signal is SIGTERM:
|
||||||
|
|
||||||
dt = time() - actions_time_dict['action_handled'][0]
|
dt = time() - actions_time_dict['action_handled'][0]
|
||||||
@ -1397,13 +1342,12 @@ def implement_corrective_action(signal):
|
|||||||
else:
|
else:
|
||||||
print('min_delay_after_sigterm IS EXCEEDED, it is time to action')
|
print('min_delay_after_sigterm IS EXCEEDED, it is time to action')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
При заходе в фцию проверяем права на сигтерм. Права на сигкилл у нас всегда есть
|
При заходе в фцию проверяем права на сигтерм. Права на сигкилл у нас
|
||||||
(потому что идем дальше только после полн освободж памяти после смерти жертвы)
|
всегда есть
|
||||||
|
(потому что идем дальше только после полн освободж памяти после
|
||||||
|
смерти жертвы)
|
||||||
|
|
||||||
actions_time_dict[action_handled] = time()
|
actions_time_dict[action_handled] = time()
|
||||||
actions_time_dict[veto] = True
|
actions_time_dict[veto] = True
|
||||||
@ -1414,7 +1358,6 @@ def implement_corrective_action(signal):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
log(mem_info)
|
log(mem_info)
|
||||||
|
|
||||||
pid, victim_badness, name = find_victim(print_proc_table)
|
pid, victim_badness, name = find_victim(print_proc_table)
|
||||||
@ -1425,10 +1368,8 @@ def implement_corrective_action(signal):
|
|||||||
victim_info = find_victim_info(pid, victim_badness, name)
|
victim_info = find_victim_info(pid, victim_badness, name)
|
||||||
log(victim_info)
|
log(victim_info)
|
||||||
|
|
||||||
|
# пороги могли превысиься за время поиска жертвы (поиск может занимать
|
||||||
|
# сотни миллисекунд)
|
||||||
|
|
||||||
# пороги могли превысиься за время поиска жертвы (поиск может занимать сотни миллисекунд)
|
|
||||||
mem_available, swap_total, swap_free = check_mem_and_swap()
|
mem_available, swap_total, swap_free = check_mem_and_swap()
|
||||||
|
|
||||||
ma_mib = int(mem_available) / 1024.0
|
ma_mib = int(mem_available) / 1024.0
|
||||||
@ -1445,15 +1386,8 @@ def implement_corrective_action(signal):
|
|||||||
log('Hard threshold exceeded')
|
log('Hard threshold exceeded')
|
||||||
signal = SIGKILL
|
signal = SIGKILL
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
victim_id = get_victim_id(pid)
|
victim_id = get_victim_id(pid)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ
|
# kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ
|
||||||
# ЗАДАННОГО ВРЕМЕНИ
|
# ЗАДАННОГО ВРЕМЕНИ
|
||||||
|
|
||||||
@ -1465,19 +1399,10 @@ def implement_corrective_action(signal):
|
|||||||
dt = time() - victim_dict[victim_id]
|
dt = time() - victim_dict[victim_id]
|
||||||
|
|
||||||
if dt > max_post_sigterm_victim_lifetime:
|
if dt > max_post_sigterm_victim_lifetime:
|
||||||
print('max_post_sigterm_victim_lifetime exceeded: the victim will get SIGKILL')
|
print('max_post_sigterm_victim_lifetime exceeded: the '
|
||||||
|
'victim will get SIGKILL')
|
||||||
signal = SIGKILL
|
signal = SIGKILL
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# matching with re to customize corrective actions
|
# matching with re to customize corrective actions
|
||||||
soft_match = False
|
soft_match = False
|
||||||
|
|
||||||
@ -1529,8 +1454,10 @@ def implement_corrective_action(signal):
|
|||||||
|
|
||||||
response_time = time() - time0
|
response_time = time() - time0
|
||||||
|
|
||||||
# тут надо, как и при дефолтном действии, проверять существование жертвы, ее реакцию на действие,
|
# тут надо, как и при дефолтном действии, проверять существование
|
||||||
# и время ее смерти в случае успеха, о обновление таймстемпов действия
|
# жертвы, ее реакцию на действие,
|
||||||
|
# и время ее смерти в случае успеха, о обновление таймстемпов
|
||||||
|
# действия
|
||||||
|
|
||||||
etc_info = 'Implement a corrective act' \
|
etc_info = 'Implement a corrective act' \
|
||||||
'ion:\n Run the command: {}' \
|
'ion:\n Run the command: {}' \
|
||||||
@ -1552,29 +1479,15 @@ def implement_corrective_action(signal):
|
|||||||
command.replace('$PID', pid).replace(
|
command.replace('$PID', pid).replace(
|
||||||
'$NAME', pid_to_name(pid)))
|
'$NAME', pid_to_name(pid)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
# обычное действие через сигнал
|
# обычное действие через сигнал
|
||||||
try:
|
try:
|
||||||
|
|
||||||
|
|
||||||
os.kill(int(pid), signal)
|
os.kill(int(pid), signal)
|
||||||
kill_timestamp = time()
|
kill_timestamp = time()
|
||||||
response_time = kill_timestamp - time0
|
response_time = kill_timestamp - time0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
exe_exists = os.path.exists('/proc/{}/exe'.format(pid))
|
exe_exists = os.path.exists('/proc/{}/exe'.format(pid))
|
||||||
rss = pid_to_rss(pid)
|
rss = pid_to_rss(pid)
|
||||||
@ -1586,37 +1499,29 @@ def implement_corrective_action(signal):
|
|||||||
sleep(0.001)
|
sleep(0.001)
|
||||||
|
|
||||||
if dt > 0.01:
|
if dt > 0.01:
|
||||||
log('Timer (value = 0.01 sec) expired; seems' \
|
log('Timer (value = 0.01 sec) expired; seems'
|
||||||
' like the victim handles signal')
|
' like the victim handles signal')
|
||||||
|
|
||||||
actions_time_dict['action_handled'] = [time(), get_victim_id(pid)]
|
actions_time_dict['action_handled'] = [
|
||||||
|
time(), get_victim_id(pid)]
|
||||||
|
|
||||||
if victim_id not in victim_dict: # хз как надо.
|
if victim_id not in victim_dict: # хз как надо.
|
||||||
victim_dict.update({victim_id: time()})
|
victim_dict.update({victim_id: time()})
|
||||||
|
|
||||||
|
|
||||||
# log('actions_time_dict', actions_time_dict)
|
# log('actions_time_dict', actions_time_dict)
|
||||||
# log('victim_dict', victim_dict)
|
# log('victim_dict', victim_dict)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log('Process exited (VmRSS = 0) in {} sec'.format(
|
log('Process exited (VmRSS = 0) in {} sec'.format(
|
||||||
round(dt, 5)))
|
round(dt, 5)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if signal is SIGKILL or not exe_exists or rss == 0:
|
if signal is SIGKILL or not exe_exists or rss == 0:
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
sleep(0.001)
|
sleep(0.001)
|
||||||
rss = pid_to_rss(pid) # рсс не важен когда путь не существует. Проверяй просто существование пид.
|
# рсс не важен когда путь не существует. Проверяй
|
||||||
|
# просто существование пид.
|
||||||
|
rss = pid_to_rss(pid)
|
||||||
if rss is None:
|
if rss is None:
|
||||||
break
|
break
|
||||||
t1 = time()
|
t1 = time()
|
||||||
@ -1624,7 +1529,6 @@ def implement_corrective_action(signal):
|
|||||||
log('The victim died in {} sec'.format(
|
log('The victim died in {} sec'.format(
|
||||||
round(kill_duration, 3)))
|
round(kill_duration, 3)))
|
||||||
|
|
||||||
|
|
||||||
mem_available, swap_total, swap_free = check_mem_and_swap()
|
mem_available, swap_total, swap_free = check_mem_and_swap()
|
||||||
|
|
||||||
ma_mib = int(mem_available) / 1024.0
|
ma_mib = int(mem_available) / 1024.0
|
||||||
@ -1636,12 +1540,6 @@ def implement_corrective_action(signal):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
send_result = 'total response time: {} ms'.format(
|
send_result = 'total response time: {} ms'.format(
|
||||||
round(response_time * 1000))
|
round(response_time * 1000))
|
||||||
|
|
||||||
@ -1700,11 +1598,12 @@ def implement_corrective_action(signal):
|
|||||||
update_stat_dict_and_print(key)
|
update_stat_dict_and_print(key)
|
||||||
|
|
||||||
# тут надо поспать хорошенько. а может и счетчики поправить.
|
# тут надо поспать хорошенько. а может и счетчики поправить.
|
||||||
# херню несу. во-первых, внезапно может кто-то появиться c блльшим бэднес.. Далее надо минимизировать аутпут спам.
|
# херню несу. во-первых, внезапно может кто-то появиться c блльшим
|
||||||
|
# бэднес.. Далее надо минимизировать аутпут спам.
|
||||||
sleep(over_sleep)
|
sleep(over_sleep)
|
||||||
|
|
||||||
|
# обновлять время не на каждый кил, а только на килл той жертвы,
|
||||||
# обновлять время не на каждый кил, а только на килл той жертвы, которая не отвечала на софт экшн.
|
# которая не отвечала на софт экшн.
|
||||||
# Вывод: ко времени действия прилагать также виктим айди.
|
# Вывод: ко времени действия прилагать также виктим айди.
|
||||||
|
|
||||||
print('##################################################################')
|
print('##################################################################')
|
||||||
@ -1739,17 +1638,23 @@ def sleep_after_check_mem():
|
|||||||
|
|
||||||
t_mem = mem_point / rate_mem
|
t_mem = mem_point / rate_mem
|
||||||
t_swap = swap_point / rate_swap
|
t_swap = swap_point / rate_swap
|
||||||
|
|
||||||
|
if CHECK_ZRAM:
|
||||||
t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram
|
t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram
|
||||||
if t_zram < 0:
|
if t_zram < 0:
|
||||||
t_zram = 0
|
t_zram = 0
|
||||||
|
t_mem_zram = t_mem + t_zram
|
||||||
|
|
||||||
t_mem_swap = t_mem + t_swap
|
t_mem_swap = t_mem + t_swap
|
||||||
t_mem_zram = t_mem + t_zram
|
|
||||||
|
if CHECK_ZRAM:
|
||||||
|
|
||||||
if t_mem_swap <= t_mem_zram:
|
if t_mem_swap <= t_mem_zram:
|
||||||
t = t_mem_swap
|
t = t_mem_swap
|
||||||
else:
|
else:
|
||||||
t = t_mem_zram
|
t = t_mem_zram
|
||||||
|
else:
|
||||||
|
t = t_mem_swap
|
||||||
|
|
||||||
if t > max_sleep:
|
if t > max_sleep:
|
||||||
t = max_sleep
|
t = max_sleep
|
||||||
@ -1841,6 +1746,83 @@ def calculate_percent(arg_key):
|
|||||||
##########################################################################
|
##########################################################################
|
||||||
|
|
||||||
|
|
||||||
|
start_time = time()
|
||||||
|
|
||||||
|
|
||||||
|
help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG]
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
-v, --version print version
|
||||||
|
-t, --test print some tests
|
||||||
|
-p, --print-proc-table
|
||||||
|
print table of processes with their badness values
|
||||||
|
-c CONFIG, --config CONFIG
|
||||||
|
path to the config file, default values:
|
||||||
|
./nohang.conf, /etc/nohang/nohang.conf"""
|
||||||
|
|
||||||
|
|
||||||
|
SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
|
||||||
|
|
||||||
|
SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE'])
|
||||||
|
|
||||||
|
conf_err_mess = 'Invalid config. Exit.'
|
||||||
|
|
||||||
|
sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP]
|
||||||
|
|
||||||
|
sig_dict = {
|
||||||
|
SIGKILL: 'SIGKILL',
|
||||||
|
SIGINT: 'SIGINT',
|
||||||
|
SIGQUIT: 'SIGQUIT',
|
||||||
|
SIGHUP: 'SIGHUP',
|
||||||
|
SIGTERM: 'SIGTERM'
|
||||||
|
}
|
||||||
|
|
||||||
|
self_pid = str(os.getpid())
|
||||||
|
|
||||||
|
self_uid = os.geteuid()
|
||||||
|
|
||||||
|
if self_uid == 0:
|
||||||
|
root = True
|
||||||
|
else:
|
||||||
|
root = False
|
||||||
|
|
||||||
|
|
||||||
|
if os.path.exists('./nohang_notify_helper'):
|
||||||
|
notify_helper_path = './nohang_notify_helper'
|
||||||
|
else:
|
||||||
|
notify_helper_path = '/usr/sbin/nohang_notify_helper'
|
||||||
|
|
||||||
|
|
||||||
|
victim_dict = dict()
|
||||||
|
|
||||||
|
|
||||||
|
victim_id = None
|
||||||
|
actions_time_dict = dict()
|
||||||
|
actions_time_dict['action_handled'] = [time(), victim_id]
|
||||||
|
# print(actions_time_dict)
|
||||||
|
|
||||||
|
|
||||||
|
# will store corrective actions stat
|
||||||
|
stat_dict = dict()
|
||||||
|
|
||||||
|
|
||||||
|
separate_log = False # will be overwritten after parse config
|
||||||
|
|
||||||
|
|
||||||
|
cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
|
||||||
|
|
||||||
|
|
||||||
|
self_oom_score_adj_min = '-600'
|
||||||
|
self_oom_score_adj_max = '-6'
|
||||||
|
|
||||||
|
|
||||||
|
write_self_oom_score_adj(self_oom_score_adj_min)
|
||||||
|
|
||||||
|
|
||||||
|
pid_list = get_pid_list()
|
||||||
|
|
||||||
|
|
||||||
print_proc_table_flag = False
|
print_proc_table_flag = False
|
||||||
|
|
||||||
if len(argv) == 1:
|
if len(argv) == 1:
|
||||||
@ -1879,9 +1861,6 @@ else:
|
|||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
##########################################################################
|
|
||||||
|
|
||||||
|
|
||||||
# find mem_total
|
# find mem_total
|
||||||
# find positions of SwapFree and SwapTotal in /proc/meminfo
|
# find positions of SwapFree and SwapTotal in /proc/meminfo
|
||||||
|
|
||||||
@ -1928,8 +1907,6 @@ except ValueError:
|
|||||||
detailed_rss = False
|
detailed_rss = False
|
||||||
# print('It is not Linux 4.5+')
|
# print('It is not Linux 4.5+')
|
||||||
|
|
||||||
##########################################################################
|
|
||||||
|
|
||||||
|
|
||||||
log('Config: ' + config)
|
log('Config: ' + config)
|
||||||
|
|
||||||
@ -2167,6 +2144,8 @@ gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings')
|
|||||||
gui_notifications = conf_parse_bool('gui_notifications')
|
gui_notifications = conf_parse_bool('gui_notifications')
|
||||||
decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj')
|
decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj')
|
||||||
ignore_psi = conf_parse_bool('ignore_psi')
|
ignore_psi = conf_parse_bool('ignore_psi')
|
||||||
|
ignore_zram = conf_parse_bool('ignore_zram')
|
||||||
|
|
||||||
|
|
||||||
(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent
|
(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent
|
||||||
) = calculate_percent('mem_min_sigterm')
|
) = calculate_percent('mem_min_sigterm')
|
||||||
@ -2559,43 +2538,6 @@ psi_support = os.path.exists(psi_path)
|
|||||||
# Get KiB levels if it's possible.
|
# Get KiB levels if it's possible.
|
||||||
|
|
||||||
|
|
||||||
def get_swap_threshold_tuple(string):
|
|
||||||
# re (Num %, True) or (Num KiB, False)
|
|
||||||
"""Returns KiB value if abs val was set in config, or tuple with %"""
|
|
||||||
# return tuple with abs and bool: (abs %, True) or (abs MiB, False)
|
|
||||||
|
|
||||||
if string.endswith('%'):
|
|
||||||
valid = string_to_float_convert_test(string[:-1])
|
|
||||||
if valid is None:
|
|
||||||
errprint('somewhere swap unit is not float_%')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
value = float(string[:-1].strip())
|
|
||||||
if value < 0 or value > 100:
|
|
||||||
errprint('invalid value, must be from the range[0; 100] %')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
return value, True
|
|
||||||
|
|
||||||
elif string.endswith('M'):
|
|
||||||
valid = string_to_float_convert_test(string[:-1])
|
|
||||||
if valid is None:
|
|
||||||
errprint('somewhere swap unit is not float_M')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
value = float(string[:-1].strip()) * 1024
|
|
||||||
if value < 0:
|
|
||||||
errprint('invalid unit in config (negative value)')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
return value, False
|
|
||||||
|
|
||||||
else:
|
|
||||||
errprint(
|
|
||||||
'Invalid config file. There are invalid units somewhere\nExit')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm)
|
swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm)
|
||||||
swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill)
|
swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill)
|
||||||
swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings)
|
swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings)
|
||||||
@ -2732,14 +2674,9 @@ mlockall()
|
|||||||
# print_self_rss()
|
# print_self_rss()
|
||||||
|
|
||||||
|
|
||||||
log('Monitoring has started!')
|
|
||||||
|
|
||||||
stdout.flush()
|
|
||||||
|
|
||||||
##########################################################################
|
|
||||||
|
|
||||||
psi_avg_string = '' # will be overwritten if PSI monitoring enabled
|
psi_avg_string = '' # will be overwritten if PSI monitoring enabled
|
||||||
|
|
||||||
|
mem_used_zram = 0
|
||||||
|
|
||||||
if psi_support and not ignore_psi:
|
if psi_support and not ignore_psi:
|
||||||
psi_t0 = time()
|
psi_t0 = time()
|
||||||
@ -2760,58 +2697,26 @@ for i in sig_list:
|
|||||||
signal(i, signal_handler)
|
signal(i, signal_handler)
|
||||||
|
|
||||||
|
|
||||||
|
CHECK_PSI = False
|
||||||
|
if psi_support and not ignore_psi:
|
||||||
|
CHECK_PSI = True
|
||||||
|
|
||||||
|
|
||||||
|
CHECK_ZRAM = not ignore_zram
|
||||||
|
|
||||||
|
log('Monitoring has started!')
|
||||||
|
|
||||||
|
stdout.flush()
|
||||||
|
|
||||||
|
|
||||||
|
##########################################################################
|
||||||
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
if psi_support and not ignore_psi:
|
# Q = time()
|
||||||
|
|
||||||
psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
|
# FIND VALUES: mem, swap, zram, psi
|
||||||
|
|
||||||
if print_mem_check_results:
|
|
||||||
psi_avg_string = 'PSI avg value: {} | '.format(
|
|
||||||
str(psi_avg_value).rjust(6))
|
|
||||||
|
|
||||||
if psi_avg_value >= sigkill_psi_threshold:
|
|
||||||
sigkill_psi_exceeded = True
|
|
||||||
else:
|
|
||||||
sigkill_psi_exceeded = False
|
|
||||||
|
|
||||||
if psi_avg_value >= sigterm_psi_threshold:
|
|
||||||
sigterm_psi_exceeded = True
|
|
||||||
else:
|
|
||||||
sigterm_psi_exceeded = False
|
|
||||||
|
|
||||||
if time() - psi_t0 >= psi_post_action_delay:
|
|
||||||
psi_post_action_delay_exceeded = True
|
|
||||||
else:
|
|
||||||
psi_post_action_delay_exceeded = False
|
|
||||||
|
|
||||||
if psi_debug:
|
|
||||||
log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps'
|
|
||||||
'i_post_action_delay_exceeded: {}'.format(
|
|
||||||
sigterm_psi_exceeded,
|
|
||||||
sigkill_psi_exceeded,
|
|
||||||
psi_post_action_delay_exceeded))
|
|
||||||
|
|
||||||
if sigkill_psi_exceeded and psi_post_action_delay_exceeded:
|
|
||||||
time0 = time()
|
|
||||||
mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \
|
|
||||||
'old ({})'.format(
|
|
||||||
psi_avg_value, sigkill_psi_threshold)
|
|
||||||
|
|
||||||
implement_corrective_action(SIGKILL)
|
|
||||||
|
|
||||||
psi_t0 = time()
|
|
||||||
continue
|
|
||||||
|
|
||||||
if sigterm_psi_exceeded and psi_post_action_delay_exceeded:
|
|
||||||
time0 = time()
|
|
||||||
mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \
|
|
||||||
'shold ({})'.format(psi_avg_value, sigterm_psi_threshold)
|
|
||||||
|
|
||||||
implement_corrective_action(SIGTERM)
|
|
||||||
|
|
||||||
psi_t0 = time()
|
|
||||||
continue
|
|
||||||
|
|
||||||
mem_available, swap_total, swap_free = check_mem_and_swap()
|
mem_available, swap_total, swap_free = check_mem_and_swap()
|
||||||
|
|
||||||
@ -2825,8 +2730,30 @@ while True:
|
|||||||
if swap_warn_is_percent:
|
if swap_warn_is_percent:
|
||||||
swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0
|
swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0
|
||||||
|
|
||||||
|
if swap_total > swap_min_sigkill_kb:
|
||||||
|
swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1))
|
||||||
|
else:
|
||||||
|
swap_sigkill_pc = '-'
|
||||||
|
|
||||||
|
if swap_total > swap_min_sigterm_kb:
|
||||||
|
swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1))
|
||||||
|
else:
|
||||||
|
swap_sigterm_pc = '-'
|
||||||
|
|
||||||
|
if CHECK_ZRAM:
|
||||||
mem_used_zram = check_zram()
|
mem_used_zram = check_zram()
|
||||||
|
|
||||||
|
if CHECK_PSI:
|
||||||
|
psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
|
||||||
|
if time() - psi_t0 >= psi_post_action_delay:
|
||||||
|
psi_post_action_delay_exceeded = True
|
||||||
|
else:
|
||||||
|
psi_post_action_delay_exceeded = False
|
||||||
|
|
||||||
|
if print_mem_check_results:
|
||||||
|
psi_avg_string = 'PSI avg value: {} | '.format(
|
||||||
|
str(psi_avg_value).rjust(6))
|
||||||
|
|
||||||
if print_mem_check_results:
|
if print_mem_check_results:
|
||||||
|
|
||||||
wt1 = time()
|
wt1 = time()
|
||||||
@ -2894,20 +2821,12 @@ while True:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if swap_total > swap_min_sigkill_kb:
|
###########################################################################
|
||||||
swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1))
|
|
||||||
else:
|
|
||||||
swap_sigkill_pc = '-'
|
|
||||||
|
|
||||||
if swap_total > swap_min_sigterm_kb:
|
# CHECK HARD THRESHOLDS (SIGKILL LEVEL)
|
||||||
swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1))
|
|
||||||
else:
|
|
||||||
swap_sigterm_pc = '-'
|
|
||||||
|
|
||||||
# MEM SWAP KILL
|
|
||||||
if (mem_available <= mem_min_sigkill_kb and
|
if (mem_available <= mem_min_sigkill_kb and
|
||||||
swap_free <= swap_min_sigkill_kb):
|
swap_free <= swap_min_sigkill_kb):
|
||||||
time0 = time()
|
|
||||||
|
|
||||||
mem_info = 'Hard threshold exceeded\nMemory status that requ' \
|
mem_info = 'Hard threshold exceeded\nMemory status that requ' \
|
||||||
'ires corrective actions:' \
|
'ires corrective actions:' \
|
||||||
@ -2924,13 +2843,11 @@ while True:
|
|||||||
swap_sigkill_pc)
|
swap_sigkill_pc)
|
||||||
|
|
||||||
implement_corrective_action(SIGKILL)
|
implement_corrective_action(SIGKILL)
|
||||||
|
|
||||||
psi_t0 = time()
|
psi_t0 = time()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# ZRAM KILL
|
if CHECK_ZRAM:
|
||||||
if mem_used_zram >= zram_max_sigkill_kb:
|
if mem_used_zram >= zram_max_sigkill_kb:
|
||||||
time0 = time()
|
|
||||||
|
|
||||||
mem_info = 'Hard threshold exceeded\nMemory status that requir' \
|
mem_info = 'Hard threshold exceeded\nMemory status that requir' \
|
||||||
'es corrective actions:' \
|
'es corrective actions:' \
|
||||||
@ -2942,15 +2859,31 @@ while True:
|
|||||||
percent(zram_max_sigkill_kb / mem_total))
|
percent(zram_max_sigkill_kb / mem_total))
|
||||||
|
|
||||||
implement_corrective_action(SIGKILL)
|
implement_corrective_action(SIGKILL)
|
||||||
|
|
||||||
psi_t0 = time()
|
psi_t0 = time()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# MEM SWAP TERM
|
if CHECK_PSI:
|
||||||
if mem_available <= mem_min_sigterm_kb and \
|
if psi_avg_value >= sigkill_psi_threshold:
|
||||||
swap_free <= swap_min_sigterm_kb:
|
sigkill_psi_exceeded = True
|
||||||
|
else:
|
||||||
|
sigkill_psi_exceeded = False
|
||||||
|
|
||||||
time0 = time()
|
if sigkill_psi_exceeded and psi_post_action_delay_exceeded:
|
||||||
|
|
||||||
|
mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \
|
||||||
|
'old ({})'.format(
|
||||||
|
psi_avg_value, sigkill_psi_threshold)
|
||||||
|
|
||||||
|
implement_corrective_action(SIGKILL)
|
||||||
|
psi_t0 = time()
|
||||||
|
continue
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
|
# CHECK SOFT THRESHOLDS (SIGTERM LEVEL)
|
||||||
|
|
||||||
|
if (mem_available <= mem_min_sigterm_kb and
|
||||||
|
swap_free <= swap_min_sigterm_kb):
|
||||||
|
|
||||||
mem_info = 'Soft threshold exceeded\nMemory status that requi' \
|
mem_info = 'Soft threshold exceeded\nMemory status that requi' \
|
||||||
'res corrective actions:' \
|
'res corrective actions:' \
|
||||||
@ -2967,34 +2900,54 @@ while True:
|
|||||||
swap_sigterm_pc)
|
swap_sigterm_pc)
|
||||||
|
|
||||||
implement_corrective_action(SIGTERM)
|
implement_corrective_action(SIGTERM)
|
||||||
|
|
||||||
psi_t0 = time()
|
psi_t0 = time()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# ZRAM TERM
|
if CHECK_ZRAM:
|
||||||
if mem_used_zram >= zram_max_sigterm_kb:
|
if mem_used_zram >= zram_max_sigterm_kb:
|
||||||
time0 = time()
|
|
||||||
|
|
||||||
mem_info = 'Soft threshold exceeded\nMemory status that requ' \
|
mem_info = 'Soft threshold exceeded\nMemory status that require' \
|
||||||
'ires corrective actions:' \
|
's corrective actions:\n MemUsedZram [{} MiB, {} %] >= zra' \
|
||||||
'\n MemUsedZram [{} MiB, {} %] >= ' \
|
'm_max_sigterm [{} M, {} %]'.format(
|
||||||
'zram_max_sigterm [{} M, {} %]'.format(
|
|
||||||
kib_to_mib(mem_used_zram),
|
kib_to_mib(mem_used_zram),
|
||||||
percent(mem_used_zram / mem_total),
|
percent(mem_used_zram / mem_total),
|
||||||
kib_to_mib(zram_max_sigterm_kb),
|
kib_to_mib(zram_max_sigterm_kb),
|
||||||
percent(zram_max_sigterm_kb / mem_total))
|
percent(zram_max_sigterm_kb / mem_total))
|
||||||
|
|
||||||
implement_corrective_action(SIGTERM)
|
implement_corrective_action(SIGTERM)
|
||||||
|
|
||||||
psi_t0 = time()
|
psi_t0 = time()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# LOW MEMORY WARNINGS
|
if CHECK_PSI:
|
||||||
|
if psi_avg_value >= sigterm_psi_threshold:
|
||||||
|
sigterm_psi_exceeded = True
|
||||||
|
else:
|
||||||
|
sigterm_psi_exceeded = False
|
||||||
|
|
||||||
|
if psi_debug:
|
||||||
|
log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps'
|
||||||
|
'i_post_action_delay_exceeded: {}'.format(
|
||||||
|
sigterm_psi_exceeded,
|
||||||
|
sigkill_psi_exceeded,
|
||||||
|
psi_post_action_delay_exceeded))
|
||||||
|
|
||||||
|
if sigterm_psi_exceeded and psi_post_action_delay_exceeded:
|
||||||
|
|
||||||
|
mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \
|
||||||
|
'shold ({})'.format(psi_avg_value, sigterm_psi_threshold)
|
||||||
|
|
||||||
|
implement_corrective_action(SIGTERM)
|
||||||
|
psi_t0 = time()
|
||||||
|
continue
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
|
||||||
if gui_low_memory_warnings:
|
if gui_low_memory_warnings:
|
||||||
|
|
||||||
if mem_available <= mem_min_warnings_kb and \
|
if (mem_available <= mem_min_warnings_kb and
|
||||||
swap_free <= swap_min_warnings_kb + 0.1 or \
|
swap_free <= swap_min_warnings_kb + 0.1 or
|
||||||
mem_used_zram >= zram_max_warnings_kb:
|
mem_used_zram >= zram_max_warnings_kb):
|
||||||
|
|
||||||
warn_time_delta = time() - warn_time_now
|
warn_time_delta = time() - warn_time_now
|
||||||
warn_time_now = time()
|
warn_time_now = time()
|
||||||
warn_timer += warn_time_delta
|
warn_timer += warn_time_delta
|
||||||
@ -3003,17 +2956,7 @@ while True:
|
|||||||
warn_timer = 0
|
warn_timer = 0
|
||||||
|
|
||||||
|
|
||||||
|
# x = time() - Q
|
||||||
|
# print(x * 1000)
|
||||||
|
|
||||||
|
|
||||||
# SLEEP BETWEEN MEM CHECKS
|
|
||||||
sleep_after_check_mem()
|
sleep_after_check_mem()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
72
nohang.conf
72
nohang.conf
@ -1,34 +1,38 @@
|
|||||||
This is nohang config file.
|
This is nohang config file.
|
||||||
Lines starting with #, tabs and spaces are comments.
|
Lines starting with #, tabs and spaces are comments.
|
||||||
Lines starting with @ contain optional parameters.
|
Lines starting with @ contain optional parameters.
|
||||||
|
All values are case sensitive.
|
||||||
|
Be careful: nohang doesn't forbid you to shoot yourself in the foot.
|
||||||
|
|
||||||
The configuration includes the following sections:
|
The configuration includes the following sections:
|
||||||
|
|
||||||
|
0. Common zram settings
|
||||||
1. Memory levels to respond to as an OOM threat
|
1. Memory levels to respond to as an OOM threat
|
||||||
2. Response on PSI memory metrics
|
2. Response on PSI memory metrics
|
||||||
3. The frequency of checking the level of available memory
|
3. The frequency of checking the level of available memory
|
||||||
(and CPU usage)
|
(and CPU usage)
|
||||||
4. The prevention of killing innocent victims
|
4. The prevention of killing innocent victims
|
||||||
5. Impact on the badness of processes via matching their
|
5. Impact on the badness of processes via matching their names, cgroups and
|
||||||
- names,
|
cmdlines with specified regular expressions
|
||||||
- cgroups,
|
|
||||||
- cmdlines and
|
|
||||||
- UIDs
|
|
||||||
with regular expressions
|
|
||||||
6. Customize corrective actions: the execution of a specific command
|
6. Customize corrective actions: the execution of a specific command
|
||||||
instead of sending the SIGTERM signal
|
instead of sending the SIGTERM signal
|
||||||
7. GUI notifications:
|
7. GUI notifications:
|
||||||
- OOM prevention results and
|
|
||||||
- low memory warnings
|
- low memory warnings
|
||||||
|
- OOM prevention results
|
||||||
8. Output verbosity
|
8. Output verbosity
|
||||||
9. Misc
|
9. Misc
|
||||||
|
|
||||||
Just read the description of the parameters and edit the values.
|
Just read the description of the parameters and edit the values.
|
||||||
Please restart the program after editing the config.
|
Please restart the program after editing the config.
|
||||||
|
|
||||||
Bool values are case sensitive.
|
###############################################################################
|
||||||
|
|
||||||
#####################################################################
|
0. Common zram settings
|
||||||
|
|
||||||
|
See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
|
||||||
|
You maybe need to set `ignore_zram = False` if you has a big zram disksize.
|
||||||
|
|
||||||
|
ignore_zram = False
|
||||||
|
|
||||||
1. Thresholds below which a signal should be sent to the victim
|
1. Thresholds below which a signal should be sent to the victim
|
||||||
|
|
||||||
@ -57,9 +61,9 @@ swap_min_sigkill = 5 %
|
|||||||
numbers from the range [0; 90] %.
|
numbers from the range [0; 90] %.
|
||||||
|
|
||||||
zram_max_sigterm = 50 %
|
zram_max_sigterm = 50 %
|
||||||
zram_max_sigkill = 55 %
|
zram_max_sigkill = 60 %
|
||||||
|
|
||||||
#####################################################################
|
###############################################################################
|
||||||
|
|
||||||
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
|
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
|
||||||
|
|
||||||
@ -102,7 +106,7 @@ sigkill_psi_threshold = 90
|
|||||||
|
|
||||||
psi_post_action_delay = 60
|
psi_post_action_delay = 60
|
||||||
|
|
||||||
#####################################################################
|
###############################################################################
|
||||||
|
|
||||||
3. The frequency of checking the amount of available memory
|
3. The frequency of checking the amount of available memory
|
||||||
(and CPU usage)
|
(and CPU usage)
|
||||||
@ -124,7 +128,7 @@ psi_post_action_delay = 60
|
|||||||
|
|
||||||
rate_mem = 4000
|
rate_mem = 4000
|
||||||
rate_swap = 1500
|
rate_swap = 1500
|
||||||
rate_zram = 500
|
rate_zram = 6000
|
||||||
|
|
||||||
See also https://github.com/rfjakob/earlyoom/issues/61
|
See also https://github.com/rfjakob/earlyoom/issues/61
|
||||||
|
|
||||||
@ -135,7 +139,7 @@ min_sleep = 0.1
|
|||||||
|
|
||||||
over_sleep = 0.05
|
over_sleep = 0.05
|
||||||
|
|
||||||
#####################################################################
|
###############################################################################
|
||||||
|
|
||||||
4. The prevention of killing innocent victims
|
4. The prevention of killing innocent victims
|
||||||
|
|
||||||
@ -144,7 +148,7 @@ over_sleep = 0.05
|
|||||||
min_badness = 20
|
min_badness = 20
|
||||||
|
|
||||||
Valid values are non-negative floating-point numbers.
|
Valid values are non-negative floating-point numbers.
|
||||||
Min delay if a victim does not respond to SIGTERM in 10 ms.
|
Min delay if a victim doesn't respond to SIGTERM in 10 ms.
|
||||||
|
|
||||||
min_delay_after_sigterm = 3
|
min_delay_after_sigterm = 3
|
||||||
|
|
||||||
@ -157,7 +161,7 @@ decrease_oom_score_adj = False
|
|||||||
|
|
||||||
oom_score_adj_max = 0
|
oom_score_adj_max = 0
|
||||||
|
|
||||||
#####################################################################
|
###############################################################################
|
||||||
|
|
||||||
5. Impact on the badness of processes via matching their names,
|
5. Impact on the badness of processes via matching their names,
|
||||||
cmdlines or UIDs with regular expressions using re.search().
|
cmdlines or UIDs with regular expressions using re.search().
|
||||||
@ -194,21 +198,15 @@ oom_score_adj_max = 0
|
|||||||
|
|
||||||
A good option that allows fine adjustment.
|
A good option that allows fine adjustment.
|
||||||
|
|
||||||
Prefer electron-based apps and chromium tabs
|
Prefer chromium tabs and electron-based apps
|
||||||
@CMDLINE_RE 200 /// --type=renderer
|
@CMDLINE_RE 200 /// --type=renderer
|
||||||
|
|
||||||
Prefer firefox tabs
|
Prefer firefox tabs (Web Content and WebExtensions)
|
||||||
@CMDLINE_RE 100 /// -greomni|-childID
|
@CMDLINE_RE 100 /// -appomni
|
||||||
|
|
||||||
|
|
||||||
@CMDLINE_RE -500 /// python
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@CMDLINE_RE -200 /// ^/usr/lib/virtualbox
|
@CMDLINE_RE -200 /// ^/usr/lib/virtualbox
|
||||||
|
|
||||||
5.3 Matching UIDs with RE patterns
|
5.3 Matching eUIDs with RE patterns
|
||||||
|
|
||||||
The most slow option
|
The most slow option
|
||||||
|
|
||||||
@ -232,10 +230,11 @@ oom_score_adj_max = 0
|
|||||||
|
|
||||||
@ENVIRON_RE 100 /// USER=user
|
@ENVIRON_RE 100 /// USER=user
|
||||||
|
|
||||||
Note that you can control badness also via systemd units via OOMScoreAdjust, see
|
Note that you can control badness also via systemd units via
|
||||||
https://www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
|
OOMScoreAdjust, see
|
||||||
|
www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
|
||||||
|
|
||||||
#####################################################################
|
###############################################################################
|
||||||
|
|
||||||
6. Customize corrective actions.
|
6. Customize corrective actions.
|
||||||
|
|
||||||
@ -252,9 +251,10 @@ oom_score_adj_max = 0
|
|||||||
|
|
||||||
$PID will be replaced by process PID.
|
$PID will be replaced by process PID.
|
||||||
$NAME will be replaced by process name.
|
$NAME will be replaced by process name.
|
||||||
$SERVICE will be replaced by .service if it exists (overwise it will be relpaced by empty line).
|
$SERVICE will be replaced by .service if it exists (overwise it will be
|
||||||
|
relpaced by empty line)
|
||||||
|
|
||||||
#####################################################################
|
###############################################################################
|
||||||
|
|
||||||
7. GUI notifications:
|
7. GUI notifications:
|
||||||
- OOM prevention results and
|
- OOM prevention results and
|
||||||
@ -289,7 +289,7 @@ min_time_between_warnings = 15
|
|||||||
Ampersands (&) will be replaced with asterisks (*) in process
|
Ampersands (&) will be replaced with asterisks (*) in process
|
||||||
names and in commands.
|
names and in commands.
|
||||||
|
|
||||||
#####################################################################
|
###############################################################################
|
||||||
|
|
||||||
8. Verbosity
|
8. Verbosity
|
||||||
|
|
||||||
@ -303,7 +303,7 @@ print_config = False
|
|||||||
|
|
||||||
print_mem_check_results = False
|
print_mem_check_results = False
|
||||||
|
|
||||||
min_mem_report_interval = 60
|
min_mem_report_interval = 300
|
||||||
|
|
||||||
Print sleep periods between memory checks.
|
Print sleep periods between memory checks.
|
||||||
Valid values are True and False.
|
Valid values are True and False.
|
||||||
@ -327,15 +327,13 @@ extra_table_info = cgroup_v1
|
|||||||
|
|
||||||
print_victim_info = False
|
print_victim_info = False
|
||||||
|
|
||||||
# print_victim_cmdline
|
max_ancestry_depth = 10
|
||||||
|
|
||||||
max_ancestry_depth = 1
|
|
||||||
|
|
||||||
separate_log = False
|
separate_log = False
|
||||||
|
|
||||||
psi_debug = False
|
psi_debug = False
|
||||||
|
|
||||||
#####################################################################
|
###############################################################################
|
||||||
|
|
||||||
9. Misc
|
9. Misc
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ send_signal = SIGTERM
|
|||||||
# os.kill(int(pid), SIGCONT)
|
# os.kill(int(pid), SIGCONT)
|
||||||
|
|
||||||
|
|
||||||
os.kill(int(pid), send_signal)
|
# os.kill(int(pid), send_signal)
|
||||||
t0 = time()
|
t0 = time()
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user