nohang/nohang
Alexey Avramov c329f92c2c 1
2018-06-08 17:08:16 +09:00

511 lines
15 KiB
Python
Executable File

#!/usr/bin/env python3
# nohang - no hang daemon
import os
from ctypes import CDLL
from operator import itemgetter
from signal import SIGKILL, SIGTERM
from time import gmtime, strftime, sleep, time
if os.path.exists('./nohang.conf'):
config = './nohang.conf'
print('config: {}'.format(config))
elif os.path.exists('/etc/nohang/nohang.conf'):
config = '/etc/nohang/nohang.conf'
print('config: {}'.format(config))
else:
print('укажите путь к конфигу опцией --config')
exit()
zram_disksize_factor = 0.0042
###########################################################################################
def decrease_oom_score_adj(oom_score_adj_before, oom_score_adj_after):
print('decrease oom_score_adj...')
# цикл для наполнения oom_list
for i in os.listdir('/proc'):
# пропускаем элементы, не состоящие только из цифр
if i.isdigit() is not True:
continue
try:
oom_score_adj = int(rline1('/proc/' + i + '/oom_score_adj'))
if oom_score_adj > oom_score_adj_before:
write('/proc/' + i + '/oom_score_adj', oom_score_adj_after + '\n')
except FileNotFoundError:
pass
except ProcessLookupError:
pass
# чтение первой строки файла
def rline1(path):
with open(path) as f:
for line in f:
return line[:-1]
# обработать исключения!
def write(path, string):
with open(path, 'w') as f:
f.write(string)
def config_parser(config):
if os.path.exists(config):
try:
with open(config) as f:
name_value_dict = dict()
for line in f:
a = line.startswith('#')
b = line.startswith('\n')
c = line.startswith('\t')
d = line.startswith(' ')
if not a and not b and not c and not d:
a = line.split('=')
name_value_dict[a[0].strip()] = a[1].strip()
return name_value_dict
except PermissionError:
return 2
else:
return 1
def sig_level_to_kb(string):
if string.endswith('%'):
return mem_total * float(string[:-1].strip()) / 100
if string.endswith('KiB'):
return float(string[:-3].strip())
if string.endswith('MiB'):
return float(string[:-3].strip()) * 1024
if string.endswith('GiB'):
return float(string[:-3].strip()) * 1048576
# перевод дроби в проценты
def percent(num):
a = str(round(num * 100, 1)).split('.')
a0 = a[0].rjust(3, ' ')
a1 = a[1]
return '{}.{}'.format(a0, a1)
# K -> M, выравнивание по правому краю
def human(num):
return str(round(num / 1024)).rjust(5, ' ')
# возвращает disksize и mem_used_total по zram id
def zram_stat(zram_id):
try:
with open('/sys/block/' + zram_id + '/disksize') as file:
disksize = file.readlines()
except FileNotFoundError:
return '0', '0'
if disksize == ['0\n']:
return '0', '0'
try:
with open('/sys/block/' + zram_id + '/mm_stat') as file:
mm_stat = file.readlines()[0][:-1].split(' ')
mm_stat_list = []
# улучшить, сократить цикл
for i in mm_stat:
if i != '':
mm_stat_list.append(i)
mem_used_total = mm_stat_list[2]
except FileNotFoundError:
with open('/sys/block/' + zram_id + '/mem_used_total') as file:
mem_used_total = file.readlines()[0][:-1]
return disksize[0][:-1], mem_used_total # BYTES, str
# имя через пид
def pid_to_name(pid):
try:
with open('/proc/' + pid + '/status') as f:
for line in f:
return line[:-1].split('\t')[1]
except FileNotFoundError:
return '<unknown>'
except ProcessLookupError:
return '<unknown>'
# поиск пид жертвы И ПОСЫЛ СИГНАЛА
def find_victim(signal):
if decrease_oom_score_adj_enable and root:
decrease_oom_score_adj(oom_score_adj_before, oom_score_adj_after)
print('find victim...')
oom_list = []
for i in os.listdir('/proc'):
if i.isdigit() is not True:
continue
try:
with open('/proc/' + i + '/oom_score') as file:
oom_score = int(file.readlines()[0][:-1])
except FileNotFoundError:
oom_score = 0
oom_list.append((i, oom_score))
# получаем список пар (pid, oom_score)
pid_tuple_list = sorted(oom_list, key=itemgetter(1), reverse=True)[0]
oom_score = pid_tuple_list[1]
# посылаем сигнал
if oom_score >= oom_score_min:
pid = pid_tuple_list[0]
name = pid_to_name(pid)
print(
'Try to send signal {} to process {}, Pid {}, oom_score {}'.format(
signal, name, pid, oom_score
)
)
try:
os.kill(int(pid), signal)
print('Success\n')
except ProcessLookupError:
print('No such process\n')
except PermissionError:
print('Operation not permitted\n')
else:
print('\noom_score {} < oom_score_min {}\n'.format(oom_score, oom_score_min))
###########################################################################################
# START
# ищем позиции
with open('/proc/meminfo') as file:
mem_list = file.readlines()
mem_list_names = []
for s in mem_list:
mem_list_names.append(s.split(':')[0])
if mem_list_names[2] != 'MemAvailable':
print('Your Linux kernel is too old (3.14+ requie), bye!')
exit()
swap_total_index = mem_list_names.index('SwapTotal')
swap_free_index = swap_total_index + 1
mem_total = int(mem_list[0].split(':')[1].split(' ')[-2])
###############################################################################################
config_dict = config_parser(config)
print(config_dict, '\n')
if config_dict is 1:
print('config {} does not exists'.format(config))
elif config_dict is 2:
print('cannot read config {}, permission error'.format(config))
else:
print('config: {}\n'.format(config))
if 'mlockall' in config_dict:
mlockall = config_dict['mlockall']
if mlockall == 'yes':
mlockall = True
print('mlockall: {}'.format(mlockall))
if 'self_nice' in config_dict:
self_nice = int(config_dict['self_nice'])
print('self_nice: {}'.format(self_nice))
if 'self_oom_score_adj' in config_dict:
self_oom_score_adj = int(config_dict['self_oom_score_adj'])
print('self_oom_score_adj: {}'.format(self_oom_score_adj))
if 'rate_mem' in config_dict:
rate_mem = float(config_dict['rate_mem'])
print('rate_mem: {}'.format(rate_mem))
if 'rate_swap' in config_dict:
rate_swap = float(config_dict['rate_swap'])
print('rate_swap: {}'.format(rate_swap))
if 'rate_zram' in config_dict:
rate_zram = float(config_dict['rate_zram'])
print('rate_zram: {}'.format(rate_zram))
if 'mem_min_sigterm' in config_dict:
mem_min_sigterm = config_dict['mem_min_sigterm']
print('mem_min_sigterm: {}'.format(mem_min_sigterm))
if 'mem_min_sigkill' in config_dict:
mem_min_sigkill = config_dict['mem_min_sigkill']
print('mem_min_sigkill: {}'.format(mem_min_sigkill))
if 'swap_min_sigterm' in config_dict:
swap_min_sigterm = config_dict['swap_min_sigterm']
print('swap_min_sigterm: {}'.format(swap_min_sigterm))
if 'swap_min_sigkill' in config_dict:
swap_min_sigkill = config_dict['swap_min_sigkill']
print('swap_min_sigkill: {}'.format(swap_min_sigkill))
if 'check_zram' in config_dict:
check_zram = config_dict['check_zram']
if check_zram == 'yes':
check_zram = True
print('check_zram: {}'.format(check_zram))
if 'zram_max_sigterm' in config_dict:
zram_max_sigterm = config_dict['zram_max_sigterm']
print('zram_max_sigterm: {}'.format(zram_max_sigterm))
if 'zram_max_sigkill' in config_dict:
zram_max_sigkill = config_dict['zram_max_sigkill']
print('zram_max_sigkill: {}'.format(zram_max_sigkill))
if 'min_delay_after_sigterm' in config_dict:
min_delay_after_sigterm = float(config_dict['min_delay_after_sigterm'])
print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm))
if 'min_delay_after_sigkill' in config_dict:
min_delay_after_sigkill = float(config_dict['min_delay_after_sigkill'])
print('min_delay_after_sigkill: {}'.format(min_delay_after_sigkill))
if 'oom_score_min' in config_dict:
oom_score_min = int(config_dict['oom_score_min'])
print('oom_score_min: {}'.format(oom_score_min))
if 'decrease_oom_score_adj_enable' in config_dict:
decrease_oom_score_adj_enable = config_dict['decrease_oom_score_adj_enable']
if decrease_oom_score_adj_enable == 'yes':
decrease_oom_score_adj_enable = True
print('decrease_oom_score_adj_enable: {}'.format(decrease_oom_score_adj_enable))
if 'oom_score_adj_before' in config_dict:
oom_score_adj_before = int(config_dict['oom_score_adj_before'])
print('oom_score_adj_before: {}'.format(oom_score_adj_before))
if 'oom_score_adj_after' in config_dict:
oom_score_adj_after = config_dict['oom_score_adj_after']
print('oom_score_adj_after: {}'.format(oom_score_adj_after))
if 'use_lists' in config_dict:
use_lists = config_dict['use_lists']
if use_lists == 'yes':
use_lists = True
print('use_lists: {}'.format(use_lists))
if 'white_list' in config_dict:
white_list = config_dict['white_list'].split(',')
for i in range(len(white_list)):
white_list[i] = white_list[i].strip()
print('white_list: {}'.format(white_list))
if 'avoid_list' in config_dict:
avoid_list = config_dict['avoid_list'].split(',')
for i in range(len(avoid_list)):
avoid_list[i] = avoid_list[i].strip()
print('avoid_list: {}'.format(avoid_list))
if 'avoid_ratio' in config_dict:
avoid_ratio = float(config_dict['avoid_ratio'])
print('avoid_ratio: {}'.format(avoid_ratio))
if 'black_list' in config_dict:
black_list = config_dict['black_list'].split(',')
for i in range(len(black_list)):
black_list[i] = black_list[i].strip()
print('black_list: {}'.format(black_list))
if 'prefer_list' in config_dict:
prefer_list = config_dict['prefer_list'].split(',')
for i in range(len(prefer_list)):
prefer_list[i] = prefer_list[i].strip()
print('prefer_list: {}'.format(prefer_list))
if 'prefer_ratio' in config_dict:
prefer_ratio = float(config_dict['prefer_ratio'])
print('prefer_ratio: {}\n'.format(prefer_ratio))
mem_min_sigterm_kb = sig_level_to_kb(mem_min_sigterm)
mem_min_sigkill_kb = sig_level_to_kb(mem_min_sigkill)
swap_min_sigterm_kb = sig_level_to_kb(swap_min_sigterm)
swap_min_sigkill_kb = sig_level_to_kb(swap_min_sigkill)
zram_max_sigterm_kb = sig_level_to_kb(zram_max_sigterm)
zram_max_sigkill_kb = sig_level_to_kb(zram_max_sigkill)
print("\ncurrent process's effective user id", os.geteuid())
if os.geteuid() == 0:
root = True
else:
root = False
print()
# lock all memory for prevent swapping
if mlockall:
print('mlockall = yes')
print('try to lock memory...')
result = CDLL('libc.so.6', use_errno=True).mlockall(3)
if result is 0:
print('memory locked!', 'result', result)
else:
print('cannot lock memory!', 'result', result)
else:
print('mlockall != yes')
# повышаем приоритет
try:
os.nice(self_nice)
print('self_nice = {}'.format(self_nice))
except PermissionError:
pass
# запрещаем самоубийство по возможности
try:
with open('/proc/self/oom_score_adj', 'w') as file:
file.write('{}\n'.format(self_oom_score_adj))
print('self_oom_score_adj = {}'.format(self_oom_score_adj))
except PermissionError:
pass
except OSError:
pass
###########################################################################################
print()
# рабочий цикл
while True:
#decrease_oom_score_adj(oom_score_adj_before, oom_score_adj_after)
# находим mem_available, swap_total, swap_free
with open('/proc/meminfo') as f:
for n, line in enumerate(f):
if n == 2:
mem_available = int(line.split(':')[1].split(' ')[-2])
continue
if n == swap_total_index:
swap_total = int(line.split(':')[1].split(' ')[-2])
continue
if n == swap_free_index:
swap_free = int(line.split(':')[1].split(' ')[-2])
break
# тут находим фулл зрам
disksize_sum = 0
mem_used_total_sum = 0
for dev in os.listdir('/sys/block'):
if dev.startswith('zram'):
stat = zram_stat(dev)
disksize_sum += int(stat[0])
mem_used_total_sum += int(stat[1])
full_zram = (
mem_used_total_sum + disksize_sum * zram_disksize_factor
) / 1024.0
# если не печатать периоды, то можно это вынести в конец
t_mem = mem_available / 1024.0 / 1024.0 / rate_mem
t_swap = swap_free / 1024.0 / 1024.0 / rate_swap
# fullzram может превысить 09, будет отриц значение
# memtotal * 0.9 - это фактически макс память для зрам
t_zram = (mem_total * 0.8 - full_zram) / 1024.0 / 1024.0 / rate_zram
if t_zram <= 0:
t_zram = 0.01
t1 = t_mem + t_swap
t2 = t_mem + t_zram
# используем наименьший
if t1 <= t2:
t = t1
else:
t = t2
print(
'{} Mem: {} M, Swap: {} M, Zram: {} M'.format(
strftime("%Y-%b-%d %H:%M:%S", gmtime()),
human(mem_available),
human(swap_free),
human(full_zram)
)
)
if mem_available <= mem_min_sigkill_kb and swap_free <= swap_min_sigkill_kb:
print('mem_available < mem_min_sigkill and swap_free < swap_min_sigkill')
find_victim(SIGKILL)
sleep(min_delay_after_sigkill)
continue
if full_zram >= zram_max_sigkill_kb:
print('full_zram > zram_max_sigkill')
find_victim(SIGKILL)
sleep(min_delay_after_sigkill)
continue
if mem_available <= mem_min_sigterm_kb and swap_free <= swap_min_sigterm_kb:
print('mem_available < mem_min_sigterm and swap_free < swap_min_sigterm')
find_victim(SIGTERM)
sleep(min_delay_after_sigterm)
if full_zram >= zram_max_sigterm_kb:
print('zram_part > zram_max_sigterm')
find_victim(SIGTERM)
sleep(min_delay_after_sigterm)
# вариант - перенести задержку в фц поиска жертв
sleep(t)