nohang/nohang
2018-06-12 01:02:26 +09:00

737 lines
22 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# Nohang - No Hang Daemon
###########################################################################################
# - импорты
import os
from ctypes import CDLL
from operator import itemgetter
from time import sleep
from argparse import ArgumentParser
###########################################################################################
# - задание констант
# найден экспериментально, требует уточнения с разными ядрами и архитектурами
zram_disksize_factor = 0.0042
# где искать конфиг, если не указан через --config
default_configs = ('./nohang.conf', '/etc/nohang/nohang.conf')
err_mess = '\nSet up path to the valid config file with -c/--config CONFIG option!\nexit'
###########################################################################################
# - задание функций
def decrease_oom_score_adj(oom_score_adj_before, oom_score_adj_after):
# цикл для наполнения oom_list
for i in os.listdir('/proc'):
# пропускаем элементы, не состоящие только из цифр
if i.isdigit() is not True:
continue
try:
oom_score_adj = int(rline1('/proc/' + i + '/oom_score_adj'))
if oom_score_adj > oom_score_adj_before:
write('/proc/' + i + '/oom_score_adj', oom_score_adj_after + '\n')
except FileNotFoundError:
pass
except ProcessLookupError:
pass
# чтение первой строки файла
def rline1(path):
with open(path) as f:
for line in f:
return line[:-1]
# запись в файл
def write(path, string):
with open(path, 'w') as f:
f.write(string)
def kib_to_mib(num):
return round(num / 1024.0)
def percent(n):
return round(n * 100, 1)
def just_percent(num):
return str(round(num * 100, 1)).rjust(5, ' ')
# K -> M, выравнивание по правому краю
def human(num):
return str(round(num / 1024)).rjust(5, ' ')
# возвращает disksize и mem_used_total по zram id
def zram_stat(zram_id):
try:
disksize = rline1('/sys/block/' + zram_id + '/disksize')
except FileNotFoundError:
return '0', '0'
if disksize == ['0\n']:
return '0', '0'
try:
mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ')
mm_stat_list = []
for i in mm_stat:
if i != '':
mm_stat_list.append(i)
mem_used_total = mm_stat_list[2]
except FileNotFoundError:
mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total')
return disksize, mem_used_total # BYTES, str
# имя через пид
def pid_to_name(pid):
try:
with open('/proc/' + pid + '/status') as f:
for line in f:
return line[:-1].split('\t')[1]
except FileNotFoundError:
return '<unknown>'
except ProcessLookupError:
return '<unknown>'
def find_victim_and_send_signal(signal):
if decrease_oom_score_adj and root:
decrease_oom_score_adj(oom_score_adj_before, oom_score_adj_after)
#print('Find victim...')
oom_list = []
for i in os.listdir('/proc'):
if i.isdigit() is not True:
continue
try:
oom_score = int(rline1('/proc/' + i + '/oom_score'))
except FileNotFoundError:
oom_score = 0
oom_list.append((i, oom_score))
# получаем список пар (pid, oom_score)
pid_tuple_list = sorted(oom_list, key=itemgetter(1), reverse=True)[0]
oom_score = pid_tuple_list[1]
# посылаем сигнал
if oom_score >= oom_score_min:
pid = pid_tuple_list[0]
name = pid_to_name(pid)
print(
' Try to send signal {} to {}, Pid {}, oom_score {}'.format(
signal, name, pid, oom_score
)
)
try:
os.kill(int(pid), signal)
print(' Success')
except ProcessLookupError:
print(' No such process')
except PermissionError:
print(' Operation not permitted')
else:
print(' oom_score {} < oom_score_min {}'.format(oom_score, oom_score_min))
###########################################################################################
# - поиск позиций
# ищем позиции
with open('/proc/meminfo') as file:
mem_list = file.readlines()
mem_list_names = []
for s in mem_list:
mem_list_names.append(s.split(':')[0])
if mem_list_names[2] != 'MemAvailable':
print('Your Linux kernel is too old (3.14+ requie), bye!')
exit()
swap_total_index = mem_list_names.index('SwapTotal')
swap_free_index = swap_total_index + 1
mem_total = int(mem_list[0].split(':')[1].split(' ')[-2])
# еще найти позиции VmRSS & VmSwap
###########################################################################################
# - получение пути к конфигу
# парсинг аргументов командной строки
parser = ArgumentParser()
parser.add_argument(
'-c',
'--config',
help='path to the config file, default values: ./nohang.conf, /etc/nohang/nohang.conf',
default=None,
type=str
)
arg_config = parser.parse_args().config
if arg_config is None:
# print('конфиг не задан через опцию -с/--config, берем его из дефолтных путей')
del arg_config
config = None
for i in default_configs:
if os.path.exists(i):
config = i
break
if config is None:
print('По дефолтным путям конфиг не найден', err_mess)
exit()
else:
# print('через опцию задан путь к конфигу {}'.format(arg_config))
if os.path.exists(arg_config):
config = arg_config
else:
print('нет файла по указанному пути: {}'.format(arg_config), err_mess)
exit()
print('Path to nohang config file:', config)
###########################################################################################
# - парсинг конфига с получением словаря параметров
try:
with open(config) as f:
config_dict = dict()
for line in f:
a = line.startswith('#')
b = line.startswith('\n')
c = line.startswith('\t')
d = line.startswith(' ')
if not a and not b and not c and not d:
a = line.split('=')
config_dict[a[0].strip()] = a[1].strip()
except PermissionError:
print('PermissionError', err_mess)
exit()
except UnicodeDecodeError:
print('UnicodeDecodeError', err_mess)
exit()
except IsADirectoryError:
print('IsADirectoryError', err_mess)
exit()
except IndexError:
print('IndexError', err_mess)
exit()
###########################################################################################
# - извлечение параметров из словаря, проверка наличия всех необходимых параметров
if 'print_config' in config_dict:
print_config = config_dict['print_config']
if print_config == 'True':
print_config = True
elif print_config == 'False':
print_config = False
else:
print(
'invalid print_config value {} (should be True or False), exit!'.format(
print_config
)
)
exit()
else:
print('print_config not in config, exit!')
exit()
if 'print_mem_check_results' in config_dict:
print_mem_check_results = config_dict['print_mem_check_results']
if print_mem_check_results == 'True':
print_mem_check_results = True
elif print_mem_check_results == 'False':
print_mem_check_results = False
else:
print(
'invalid print_mem_check_results value {} (should be True or False), exit!'.format(
print_mem_check_results
)
)
exit()
else:
print('print_mem_check_results not in config, exit!')
exit()
if 'mlockall' in config_dict:
mlockall = config_dict['mlockall']
if mlockall == 'True':
mlockall = True
elif mlockall == 'False':
mlockall = False
else:
print(
'invalid mlockall value {} (should be True or False), exit!'.format(
mlockall
)
)
exit()
else:
print('mlockall not in config, exit!')
exit()
if 'self_nice' in config_dict:
self_nice = int(config_dict['self_nice'])
else:
print('self_nice not in config, exit!')
exit()
if 'self_oom_score_adj' in config_dict:
self_oom_score_adj = int(config_dict['self_oom_score_adj'])
else:
print('self_oom_score_adj not in config, exit!')
exit()
if 'rate_mem' in config_dict:
rate_mem = float(config_dict['rate_mem'])
if rate_mem <= 0:
print('rate_mem должен быть положительным')
exit()
else:
print('rate_mem not in config, exit!')
exit()
if 'rate_swap' in config_dict:
rate_swap = float(config_dict['rate_swap'])
if rate_swap <= 0:
print('rate_swap должен быть положительным')
exit()
else:
print('rate_swap not in config, exit!')
exit()
if 'rate_zram' in config_dict:
rate_zram = float(config_dict['rate_zram'])
if rate_zram <= 0:
print('rate_zram должен быть положительным')
exit()
else:
print('rate_zram not in config, exit!')
exit()
if 'mem_min_sigterm' in config_dict:
mem_min_sigterm = config_dict['mem_min_sigterm']
else:
print('mem_min_sigterm not in config, exit!')
exit()
if 'mem_min_sigkill' in config_dict:
mem_min_sigkill = config_dict['mem_min_sigkill']
else:
print('mem_min_sigkill not in config, exit!')
exit()
if 'swap_min_sigterm' in config_dict:
swap_min_sigterm = config_dict['swap_min_sigterm']
else:
print('swap_min_sigterm not in config, exit!')
exit()
if 'swap_min_sigkill' in config_dict:
swap_min_sigkill = config_dict['swap_min_sigkill']
else:
print('swap_min_sigkill not in config, exit!')
exit()
if 'zram_max_sigterm' in config_dict:
zram_max_sigterm = config_dict['zram_max_sigterm']
else:
print('zram_max_sigterm not in config, exit!')
exit()
if 'zram_max_sigkill' in config_dict:
zram_max_sigkill = config_dict['zram_max_sigkill']
else:
print('zram_max_sigkill not in config, exit!')
exit()
if 'min_delay_after_sigterm' in config_dict:
min_delay_after_sigterm = float(config_dict['min_delay_after_sigterm'])
else:
print('min_delay_after_sigterm not in config, exit!')
exit()
if 'min_delay_after_sigkill' in config_dict:
min_delay_after_sigkill = float(config_dict['min_delay_after_sigkill'])
else:
print('min_delay_after_sigkill not in config, exit!')
exit()
if 'oom_score_min' in config_dict:
oom_score_min = int(config_dict['oom_score_min'])
else:
print('oom_score_min not in config, exit!')
exit()
if 'decrease_oom_score_adj' in config_dict:
decrease_oom_score_adj = config_dict['decrease_oom_score_adj']
if decrease_oom_score_adj == 'True':
decrease_oom_score_adj = True
elif decrease_oom_score_adj == 'False':
decrease_oom_score_adj = False
else:
print(
'invalid decrease_oom_score_adj value {} (should be True or False), exit!'.format(
decrease_oom_score_adj
)
)
exit()
else:
print('decrease_oom_score_adj not in config, exit!')
exit()
if 'oom_score_adj_before' in config_dict:
oom_score_adj_before = int(config_dict['oom_score_adj_before'])
else:
print('oom_score_adj_before not in config, exit!')
exit()
if 'oom_score_adj_after' in config_dict:
oom_score_adj_after = config_dict['oom_score_adj_after']
else:
print('oom_score_adj_after not in config, exit!')
exit()
###########################################################################################
# - получение уровней в килобайтах
def sig_level_to_kb(string):
if string.endswith('%'):
return float(string[:-1].strip()) / 100 * mem_total
elif string.endswith('K'):
return float(string[:-1].strip())
elif string.endswith('M'):
return float(string[:-1].strip()) * 1024
elif string.endswith('G'):
return float(string[:-1].strip()) * 1048576
else:
print('Конфиг инвалид, где-то неверно указаны единицы измерения')
exit()
mem_min_sigterm_kb = sig_level_to_kb(mem_min_sigterm)
mem_min_sigkill_kb = sig_level_to_kb(mem_min_sigkill)
zram_max_sigterm_kb = sig_level_to_kb(zram_max_sigterm)
zram_max_sigkill_kb = sig_level_to_kb(zram_max_sigkill)
# возвращает число килобайт при задании в конфиге абсолютного значения,
# или кортеж с числом процентов
def sig_level_to_kb_swap(string):
if string.endswith('%'):
return float(string[:-1].strip()), True
elif string.endswith('K'):
return float(string[:-1].strip())
elif string.endswith('M'):
return float(string[:-1].strip()) * 1024
elif string.endswith('G'):
return float(string[:-1].strip()) * 1048576
else:
print('Конфиг инвалид, где-то неверно указаны единицы измерения')
exit()
# получаем число килобайт или кортеж с процентами
swap_min_sigterm_swap = sig_level_to_kb_swap(swap_min_sigterm)
swap_min_sigkill_swap = sig_level_to_kb_swap(swap_min_sigkill)
if type(swap_min_sigterm_swap) is tuple:
swap_term_is_percent = True
swap_min_sigterm_percent = swap_min_sigterm_swap[0]
else:
swap_term_is_percent = False
swap_min_sigterm_kb = swap_min_sigterm_swap
if type(swap_min_sigkill_swap) is tuple:
swap_kill_is_percent = True
swap_min_sigkill_percent = swap_min_sigkill_swap[0]
else:
swap_kill_is_percent = False
swap_min_sigkill_kb = swap_min_sigkill_swap
###########################################################################################
# - самозащита и печать конфига
# повышаем приоритет
try:
os.nice(self_nice)
self_nice_result = 'OK'
except PermissionError:
self_nice_result = 'Fail'
pass
# возможность запрета самоубийства
try:
with open('/proc/self/oom_score_adj', 'w') as file:
file.write('{}\n'.format(self_oom_score_adj))
self_oom_score_adj_result = 'OK'
except PermissionError:
pass
self_oom_score_adj_result = 'Fail'
except OSError:
self_oom_score_adj_result = 'Fail'
pass
# запрет своппинга процесса
if mlockall:
result = CDLL('libc.so.6', use_errno=True).mlockall(3)
if result is 0:
mla_res = 'OK'
else:
mla_res = 'Fail'
else:
mla_res = ''
if os.geteuid() == 0:
root = True
decrease_res = 'OK'
else:
root = False
decrease_res = 'Impossible'
if print_config:
print('print_config: {}'.format(print_config))
print('print_mem_check_results: {}'.format(print_mem_check_results))
print('mlockall: {} ({})'.format(mlockall, mla_res))
print('self_nice: {} ({})'.format(self_nice, self_nice_result))
print('self_oom_score_adj: {} ({})'.format(self_oom_score_adj, self_oom_score_adj_result))
print('rate_mem: {}'.format(rate_mem))
print('rate_swap: {}'.format(rate_swap))
print('rate_zram: {}'.format(rate_zram))
print('mem_min_sigterm: {}'.format(mem_min_sigterm))
print('mem_min_sigkill: {}'.format(mem_min_sigkill))
print('swap_min_sigterm: {}'.format(swap_min_sigterm))
print('swap_min_sigkill: {}'.format(swap_min_sigkill))
print('zram_max_sigterm: {}'.format(zram_max_sigterm))
print('zram_max_sigkill: {}'.format(zram_max_sigkill))
print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm))
print('min_delay_after_sigkill: {}'.format(min_delay_after_sigkill))
print('oom_score_min: {}'.format(oom_score_min))
# False (OK) - OK не нужен когда фолс
print('decrease_oom_score_adj: {} ({})'.format(decrease_oom_score_adj, decrease_res))
print('oom_score_adj_before: {}'.format(oom_score_adj_before))
print('oom_score_adj_after: {}'.format(oom_score_adj_after))
###########################################################################################
# - цикл проверки уровней доступной памяти
print('Start monitoring...')
# рабочий цикл
while True:
#decrease_oom_score_adj(oom_score_adj_before, oom_score_adj_after)
# находим mem_available, swap_total, swap_free
with open('/proc/meminfo') as f:
for n, line in enumerate(f):
if n == 2:
mem_available = int(line.split(':')[1].split(' ')[-2])
continue
if n == swap_total_index:
swap_total = int(line.split(':')[1].split(' ')[-2])
continue
if n == swap_free_index:
swap_free = int(line.split(':')[1].split(' ')[-2])
break
# если swap_min_sigkill задан в процентах
if swap_kill_is_percent:
swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0
if swap_term_is_percent:
swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0
# находим MemUsedZram
disksize_sum = 0
mem_used_total_sum = 0
for dev in os.listdir('/sys/block'):
if dev.startswith('zram'):
stat = zram_stat(dev)
disksize_sum += int(stat[0])
mem_used_total_sum += int(stat[1])
mem_used_zram = (
mem_used_total_sum + disksize_sum * zram_disksize_factor
) / 1024.0
# печать результатов проверк доступной памяти
if print_mem_check_results:
print(
'MemAvail: {}M {}%, SwapFree: {}M {}%, MemUsedZram: {}M {}%'.format(
human(mem_available),
just_percent(mem_available / mem_total),
human(swap_free),
just_percent(swap_free / (swap_total + 0.0001)),
human(mem_used_zram),
just_percent(mem_used_zram / mem_total)
)
)
# если swap_min_sigkill задан в абсолютной величине и Swap_total = 0
if swap_total > swap_min_sigkill_kb:
swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 1))
else:
swap_sigkill_pc = '-'
if swap_total > swap_min_sigterm_kb:
swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 1))
else:
swap_sigterm_pc = '-'
# MEM SWAP KILL
if mem_available <= mem_min_sigkill_kb and swap_free <= swap_min_sigkill_kb:
print(
'+ MemAvail ({}M, {}%) < mem_min_sigkill ({}M, {}%)\n SwapFree' \
' ({}M, {}%) < swap_min_sigkill ({}M, {}%)'.format(
kib_to_mib(mem_available),
percent(mem_available / mem_total),
kib_to_mib(mem_min_sigkill_kb),
percent(mem_min_sigkill_kb / mem_total),
kib_to_mib(swap_free),
percent(swap_free / (swap_total + 0.0001)),
kib_to_mib(swap_min_sigkill_kb),
swap_sigkill_pc
)
)
find_victim_and_send_signal(9)
sleep(min_delay_after_sigkill)
continue
# MEM ZRAM KILL
if mem_used_zram >= zram_max_sigkill_kb:
print(
'+ MemUsedZram ({}M, {}%) > zram_max_sigkill ({}M, {}%)'.format(
kib_to_mib(mem_used_zram),
percent(mem_used_zram / mem_total),
kib_to_mib(zram_max_sigkill_kb),
percent(zram_max_sigkill_kb / mem_total)
)
)
find_victim_and_send_signal(9)
sleep(min_delay_after_sigkill)
continue
# MEM SWAP TERM
if mem_available <= mem_min_sigterm_kb and swap_free <= swap_min_sigterm_kb:
print(
'+ MemAvail ({}M, {}%) < mem_min_sigterm ({}M, {}%)\n SwapFree' \
' ({}M, {}%) < swap_min_sigterm ({}M, {}%)'.format(
kib_to_mib(mem_available),
percent(mem_available / mem_total),
kib_to_mib(mem_min_sigterm_kb),
percent(mem_min_sigterm_kb / mem_total),
kib_to_mib(swap_free),
percent(swap_free / (swap_total + 0.0001)),
kib_to_mib(swap_min_sigterm_kb),
swap_sigterm_pc
)
)
find_victim_and_send_signal(15)
sleep(min_delay_after_sigterm)
# MEM ZRAM TERM
if mem_used_zram >= zram_max_sigterm_kb:
print(
'+ MemUsedZram ({}M, {}%) > zram_max_sigterm ({}M, {}%)'.format(
kib_to_mib(mem_used_zram),
percent(mem_used_zram / mem_total),
kib_to_mib(zram_max_sigterm_kb),
percent(zram_max_sigterm_kb / mem_total)
)
)
find_victim_and_send_signal(15)
sleep(min_delay_after_sigterm)
# задание периода в зависимости от рейтов и уровней доступной памяти
t_mem = mem_available / 1000000.0 / rate_mem
t_swap = swap_free / 10000000.0 / rate_swap
t_zram = (mem_total * 0.8 - mem_used_zram) / 1000000.0 / rate_zram
if t_zram < 0.01:
t_zram = 0.01
t_mem_swap = t_mem + t_swap
t_mem_zram = t_mem + t_zram
if t_mem_swap <= t_mem_zram:
t = t_mem_swap
else:
t = t_mem_zram
try:
sleep(t)
except KeyboardInterrupt:
exit()