add new keys in config

This commit is contained in:
Alexey Avramov 2019-02-28 19:26:08 +09:00
parent 79d4d01e19
commit 5264a9641e
3 changed files with 155 additions and 37 deletions

View File

@ -215,12 +215,13 @@ Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, fe
- [x] Improve output:
- [x] Display `oom_score`, `oom_score_adj`, `Ancestry`, `EUID`, `State`, `VmSize`, `RssAnon`, `RssFile`, `RssShmem`, `Realpath`, `Cmdline` and `Lifetime` of the victim in corrective action reports
- [x] Print statistics on corrective actions after each corrective action
- [ ] Add memory report interval
- [ ] Add delta memory info (the rate of change of available memory)
- [x] Add memory report interval
- [x] Add delta memory info (the rate of change of available memory)
- [x] Improve poll rate algorithm
- [x] Add `max_post_sigterm_victim_lifetime` option: send SIGKILL to the victim if it doesn't respond to SIGTERM for a certain time
- [x] Improve victim search algorithm (do it ~30% faster) ([rfjakob/earlyoom#114](https://github.com/rfjakob/earlyoom/issues/114))
- [x] Improve limiting `oom_score_adj`: now it can works with UID != 0
- [x] Fixed process crash before performing corrective actions if Python 3.3 or Python 3.4 are used to interpret nohang
- [x] Improve GUI warnings:
- [x] Find env without run `ps`
- [x] Handle all timeouts when notify-send starts

135
nohang
View File

@ -23,10 +23,8 @@ optional arguments:
SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
conf_err_mess = 'Invalid config. Exit.'
sig_dict = {SIGKILL: 'SIGKILL',
SIGTERM: 'SIGTERM'}
@ -41,12 +39,6 @@ else:
wait_time = 10
# todo: make config option
max_sleep_time = 3
# todo: make config option
min_sleep_time = 0.1
notify_helper_path = '/usr/sbin/nohang_notify_helper'
psi_path = '/proc/pressure/memory'
@ -55,21 +47,8 @@ psi_support = os.path.exists(psi_path)
HR = ''
# todo: make config option
print_total_stat = True
print_proc_table = False
min_mem_report_interval = 5
post_kill_exe = ''
victim_dict = dict()
max_ancestry_depth = 1
max_post_sigterm_victim_lifetime = 9
##########################################################################
@ -96,7 +75,6 @@ def pid_to_starttime(pid):
def get_victim_id(pid):
# todo: handle UnicodeDecodeError
return pid + '-' + rline1(
'/proc/' + pid + '/stat').rpartition(')')[2].split(' ')[20]
@ -577,7 +555,9 @@ def sleep_after_send_signal(signal):
sleep(min_delay_after_sigkill)
else:
if print_sleep_periods:
print(' sleep', min_delay_after_sigterm)
print(
'Sleep {} sec after implementing a corrective action'.format(
min_delay_after_sigterm))
sleep(min_delay_after_sigterm)
@ -665,8 +645,9 @@ def find_victim():
pid_badness_list = []
if print_proc_table:
print('===============================================================================')
print(' PID badness Name eUID cmdline')
print('------- ------- --------------- ---------- -------')
print('------- ------- --------------- ---------- ---------------------------------')
for pid in pid_list:
@ -683,7 +664,6 @@ def find_victim():
pid_to_cmdline(pid))
)
pid_badness_list.append((pid, badness))
# Make list of (pid, badness) tuples, sorted by 'badness' values
@ -700,12 +680,15 @@ def find_victim():
victim_badness = pid_tuple_list[1]
victim_name = pid_to_name(pid)
if print_proc_table:
print('===============================================================================')
print(
'\nWorst process (PID: {}, Name: {}, badness: {}) found in {} ms'.format(
'\nWorst process found in {} ms: PID: {}, Name: {}, badness: {}'.format(
round((time() - ft1) * 1000),
pid,
victim_name,
victim_badness,
round((time() - ft1) * 1000)
victim_badness
)
)
@ -1081,7 +1064,7 @@ def sleep_after_check_mem():
if print_sleep_periods:
print(
'Sleep time: {} sec; (t_mem={}, t_swap={}, t_zram={})'.format(
'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format(
round(t, 2),
round(t_mem, 2),
round(t_swap, 2),
@ -1538,6 +1521,94 @@ else:
exit(1)
if 'max_ancestry_depth' in config_dict:
max_ancestry_depth = string_to_int_convert_test(
config_dict['max_ancestry_depth'])
if min_badness is None:
errprint('Invalid max_ancestry_depth value, not integer\nExit')
exit(1)
if max_ancestry_depth < 1:
errprint('Invalud max_ancestry_depth value\nExit')
exit(1)
else:
errprint('max_ancestry_depth is not in config\nExit')
exit(1)
if 'max_post_sigterm_victim_lifetime' in config_dict:
max_post_sigterm_victim_lifetime = string_to_float_convert_test(
config_dict['max_post_sigterm_victim_lifetime'])
if max_post_sigterm_victim_lifetime is None:
errprint('Invalid max_post_sigterm_victim_lifetime value, not float\nExit')
exit(1)
if max_post_sigterm_victim_lifetime < 0:
errprint('max_post_sigterm_victim_lifetime must be non-negative number\nExit')
exit(1)
else:
errprint('max_post_sigterm_victim_lifetime is not in config\nExit')
exit(1)
if 'post_kill_exe' in config_dict:
post_kill_exe = config_dict['post_kill_exe']
else:
errprint('post_kill_exe is not in config\nExit')
exit(1)
print_total_stat = conf_parse_bool('print_total_stat')
print_proc_table = conf_parse_bool('print_proc_table')
if 'min_mem_report_interval' in config_dict:
min_mem_report_interval = string_to_float_convert_test(
config_dict['min_mem_report_interval'])
if min_mem_report_interval is None:
errprint('Invalid min_mem_report_interval value, not float\nExit')
exit(1)
if min_mem_report_interval < 0:
errprint('min_mem_report_interval must be non-negative number\nExit')
exit(1)
else:
errprint('min_mem_report_interval is not in config\nExit')
exit(1)
if 'max_sleep_time' in config_dict:
max_sleep_time = string_to_float_convert_test(
config_dict['max_sleep_time'])
if max_sleep_time is None:
errprint('Invalid max_sleep_time value, not float\nExit')
exit(1)
if max_sleep_time <= 0:
errprint('max_sleep_time must be positive number\nExit')
exit(1)
else:
errprint('max_sleep_time is not in config\nExit')
exit(1)
if 'min_sleep_time' in config_dict:
min_sleep_time = string_to_float_convert_test(
config_dict['min_sleep_time'])
if min_sleep_time is None:
errprint('Invalid min_sleep_time value, not float\nExit')
exit(1)
if min_sleep_time <= 0:
errprint('min_sleep_time must be positive number\nExit')
exit(1)
else:
errprint('min_sleep_time is not in config\nExit')
exit(1)
if max_sleep_time < min_sleep_time:
errprint(
'max_sleep_time value must not exceed min_sleep_time value.\nExit'
)
exit(1)
##########################################################################
@ -1890,8 +1961,6 @@ while True:
kib_to_mib(swap_min_sigkill_kb),
swap_sigkill_pc)
print(mem_info)
implement_corrective_action(SIGKILL)
psi_t0 = time()
continue
@ -1909,8 +1978,6 @@ while True:
kib_to_mib(zram_max_sigkill_kb),
percent(zram_max_sigkill_kb / mem_total))
print(mem_info)
implement_corrective_action(SIGKILL)
psi_t0 = time()
continue
@ -1956,8 +2023,6 @@ while True:
kib_to_mib(zram_max_sigterm_kb),
percent(zram_max_sigterm_kb / mem_total))
print(mem_info)
implement_corrective_action(SIGTERM)
psi_t0 = time()

View File

@ -99,6 +99,18 @@ rate_zram = 0.5
See also https://github.com/rfjakob/earlyoom/issues/61
Максимальное время сна между проверками памяти.
Положительное число.
max_sleep_time = 3
Минимальное время сна между проверками памяти.
Положительное число, не превышающее max_sleep_time.
min_sleep_time = 0.1
#####################################################################
3. The prevention of killing innocent victims
@ -312,8 +324,48 @@ print_config = False
print_mem_check_results = False
Минимальная периодичность печати состояния памяти.
0 - печатать все проверки памяти.
Неотрицательное число.
min_mem_report_interval = 10
Print sleep periods between memory checks.
Valid values are True and False.
print_sleep_periods = False
Печатать общую статистику по корректирующим действиям с момента
запуска nohang после каждого корректирующего действия.
print_total_stat = True
Печатать таблицу процессов перед каждым корректирующим действием.
print_proc_table = False
Максимальная глубина показа родословной. По умолчанию (1)
показывается только родитель - PPID.
Целое положительное число.
max_ancestry_depth = 3
#####################################################################
8. Misc
Жертва может не реагировать на SIGTERM.
max_post_sigterm_victim_lifetime - это время, при превышении
которого жертва получит SIGKILL.
Неотрицательные числа.
max_post_sigterm_victim_lifetime = 9
Выполнить произвольную команду после SIGKILL.
Пустая строка - ничего не выполнять.
Произвольная строка.
post_kill_exe =