fix poll rate alg

This commit is contained in:
Alexey Avramov 2019-02-15 02:12:49 +09:00
parent cf19909d5d
commit c208f9c940
2 changed files with 68 additions and 24 deletions

88
nohang
View File

@ -1,5 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""A daemon that prevents OOM in Linux systems.""" """A daemon that prevents OOM in Linux systems."""
import os import os
from ctypes import CDLL from ctypes import CDLL
from time import sleep, time from time import sleep, time
@ -8,6 +9,9 @@ from sys import stdout, stderr, argv, exit
from signal import SIGKILL, SIGTERM from signal import SIGKILL, SIGTERM
start_time = time()
help_mess = """usage: nohang [-h] [-c CONFIG] help_mess = """usage: nohang [-h] [-c CONFIG]
optional arguments: optional arguments:
@ -46,8 +50,6 @@ else:
conf_err_mess = 'Invalid config. Exit.' conf_err_mess = 'Invalid config. Exit.'
start_time = time()
sig_dict = {SIGKILL: 'SIGKILL', sig_dict = {SIGKILL: 'SIGKILL',
SIGTERM: 'SIGTERM'} SIGTERM: 'SIGTERM'}
@ -60,10 +62,10 @@ if self_uid == 0:
else: else:
root = False root = False
wait_time = 12 wait_time = 10
# todo: make config option # todo: make config option
max_sleep_time = 2 max_sleep_time = 3
# todo: make config option # todo: make config option
min_sleep_time = 0.1 min_sleep_time = 0.1
@ -79,6 +81,8 @@ HR = '~' * 79
# todo: make config option # todo: make config option
print_total_stat = True print_total_stat = True
debug = False
########################################################################## ##########################################################################
@ -575,8 +579,15 @@ def fattest():
pid_badness_list = [] pid_badness_list = []
if debug:
print(' PID badness Name eUID')
print('------- ------- --------------- ----------')
for pid in pid_list: for pid in pid_list:
# find and modify badness (if it needs) # find and modify badness (if it needs)
try: try:
@ -605,6 +616,19 @@ def fattest():
if search(re_tup[1], uid) is not None: if search(re_tup[1], uid) is not None:
badness += int(re_tup[0]) badness += int(re_tup[0])
if debug:
print('{} {} {} {}'.format(
pid.rjust(7),
str(badness).rjust(7),
pid_to_name(pid).ljust(15),
pid_to_uid(pid).rjust(10)
)
)
except FileNotFoundError: except FileNotFoundError:
continue continue
except ProcessLookupError: except ProcessLookupError:
@ -843,9 +867,12 @@ def find_victim_and_send_signal(signal):
cmdline) cmdline)
if execute_the_command and signal is SIGTERM and name in etc_dict: if execute_the_command and signal is SIGTERM and name in etc_dict:
command = etc_dict[name] command = etc_dict[name]
exit_status = os.system(etc_dict[name].replace( exit_status = os.system(etc_dict[name].replace(
'$PID', pid).replace('$NAME', pid_to_name(pid))) '$PID', pid).replace('$NAME', pid_to_name(pid)))
if exit_status == 0: if exit_status == 0:
exit_status = '\033[32m0\033[0m' exit_status = '\033[32m0\033[0m'
else: else:
@ -854,7 +881,7 @@ def find_victim_and_send_signal(signal):
response_time = time() - time0 response_time = time() - time0
etc_info = '{}' \ etc_info = '{}' \
'\n\033[4mImplement corrective action:\033[0m\n Run the command: \033[4m{}\033[0m' \ '\n\033[4mImplement a corrective action:\033[0m\n Run the command: \033[4m{}\033[0m' \
'\n Exit status: {}; response time: {} ms'.format( '\n Exit status: {}; response time: {} ms'.format(
victim_info, command.replace( victim_info, command.replace(
'$PID', pid).replace('$NAME', pid_to_name(pid)), exit_status, '$PID', pid).replace('$NAME', pid_to_name(pid)), exit_status,
@ -947,9 +974,17 @@ def sleep_after_check_mem():
else: else:
swap_point = swap_free - swap_min_sigkill_kb swap_point = swap_free - swap_min_sigkill_kb
if swap_point < 0:
swap_point = 0
if mem_point < 0:
mem_point = 0
t_mem = mem_point / rate_mem t_mem = mem_point / rate_mem
t_swap = swap_point / rate_swap t_swap = swap_point / rate_swap
t_zram = (mem_total * 0.9 - mem_used_zram) / rate_zram t_zram = (mem_total * 0.9 - mem_used_zram) / rate_zram
if t_zram < 0:
t_zram = 0
t_mem_swap = t_mem + t_swap t_mem_swap = t_mem + t_swap
t_mem_zram = t_mem + t_zram t_mem_zram = t_mem + t_zram
@ -966,14 +1001,20 @@ def sleep_after_check_mem():
else: else:
pass pass
if print_sleep_periods:
print(
'Sleep time: {} sec; (t_mem={}, t_swap={}, t_zram={})'.format(
round(t, 2),
round(t_mem, 2),
round(t_swap, 2),
round(t_zram, 2)
)
)
stdout.flush()
try: try:
if print_sleep_periods:
print('sleep', round(t, 2))
# ' (t_mem={}, t_swap={}, t_zram={})'.format(
# round(t_mem, 2),
# round(t_swap, 2),
# round(t_zram, 2)))
stdout.flush()
sleep(t) sleep(t)
except KeyboardInterrupt: except KeyboardInterrupt:
exit(1) exit(1)
@ -1095,16 +1136,6 @@ except ValueError:
########################################################################## ##########################################################################
'''
# Configurations
cd = os.getcwd()
'''
#config = '/etc/nohang/nohang.conf'
# config = 'nohang.conf'
print('Config:', config) print('Config:', config)
@ -1566,10 +1597,19 @@ rate_mem = rate_mem * 1048576
rate_swap = rate_swap * 1048576 rate_swap = rate_swap * 1048576
rate_zram = rate_zram * 1048576 rate_zram = rate_zram * 1048576
# print(rate_mem, rate_swap, rate_zram)
warn_time_now = 0 warn_time_now = 0
warn_time_delta = 1000 warn_time_delta = 1000
warn_timer = 0 warn_timer = 0
if debug:
print()
fattest()
print()
print('Monitoring started!') print('Monitoring started!')
stdout.flush() stdout.flush()
@ -1588,6 +1628,8 @@ if psi_support and not ignore_psi:
avg_value = '' avg_value = ''
while True: while True:
if psi_support and not ignore_psi: if psi_support and not ignore_psi:
@ -1611,6 +1653,8 @@ while True:
pass pass
mem_available, swap_total, swap_free = check_mem_and_swap() mem_available, swap_total, swap_free = check_mem_and_swap()
# print(mem_available, swap_total, swap_free)
# если метры - получаем киб выше и сразу. см. # если метры - получаем киб выше и сразу. см.

View File

@ -93,8 +93,8 @@ psi_avg10_sleep_time = 60
Valid values are positive floating-point numbers. Valid values are positive floating-point numbers.
rate_mem = 6 rate_mem = 3
rate_swap = 3 rate_swap = 1.5
rate_zram = 1 rate_zram = 1
See also https://github.com/rfjakob/earlyoom/issues/61 See also https://github.com/rfjakob/earlyoom/issues/61