1767 lines
53 KiB
Python
Executable File
1767 lines
53 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""A daemon that prevents OOM in Linux systems."""
|
||
import os
|
||
from time import sleep, time
|
||
from operator import itemgetter
|
||
from sys import stdout
|
||
from signal import SIGKILL, SIGTERM, SIGSTOP, SIGCONT
|
||
|
||
start_time = time()
|
||
|
||
sig_dict = {SIGKILL: 'SIGKILL',
|
||
SIGTERM: 'SIGTERM'}
|
||
|
||
self_pid = str(os.getpid())
|
||
|
||
self_uid = os.geteuid()
|
||
|
||
if self_uid == 0:
|
||
root = True
|
||
else:
|
||
root = False
|
||
|
||
wait_time = 3
|
||
|
||
# todo: make config option
|
||
max_sleep_time = 2
|
||
|
||
# todo: make config option
|
||
min_sleep_time = 0.1
|
||
|
||
notify_helper_path = '/usr/sbin/nohang_notify_helper'
|
||
|
||
psi_path = '/proc/pressure/memory'
|
||
|
||
psi_support = os.path.exists(psi_path)
|
||
|
||
HR = '~' * 79
|
||
|
||
# todo: make config option
|
||
print_total_stat = True
|
||
|
||
|
||
stop_cont = False
|
||
stop_cont_warn = False
|
||
|
||
|
||
|
||
|
||
# print(os.path.realpath('/proc/29758/exe'))
|
||
|
||
|
||
|
||
|
||
|
||
##########################################################################
|
||
|
||
# define functions
|
||
|
||
|
||
|
||
|
||
def pid_to_state(pid):
|
||
return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1]
|
||
|
||
def stop():
|
||
#print()
|
||
#print('Stop running processes...')
|
||
t1 = time()
|
||
t2 = time()
|
||
stopped_list = []
|
||
for pid in os.listdir('/proc')[::-1]:
|
||
# only directories whose names consist only of numbers, except /proc/1/
|
||
if pid[0].isdecimal() is False or pid is '1' or pid == self_pid:
|
||
continue
|
||
try:
|
||
oom_score_r = int(rline1('/proc/' + pid + '/oom_score'))
|
||
if oom_score_r > 9:
|
||
uid_r = pid_to_uid(pid)
|
||
#print('PID: {}, State: {}, oom_score {}'.format(pid, pid_to_state(pid), oom_score_r))
|
||
if uid_r != '0':
|
||
stopped_list.append(pid)
|
||
print('Send SIGSTOP to {}, {}, {}...'.format(
|
||
pid, pid_to_name(pid), pid_to_cmdline(pid)[:40]))
|
||
os.kill(int(pid), SIGSTOP)
|
||
t2 = time()
|
||
except FileNotFoundError:
|
||
continue
|
||
except ProcessLookupError:
|
||
continue
|
||
print('Stop time:', t2 - t1)
|
||
stdout.flush()
|
||
|
||
return stopped_list
|
||
|
||
def cont(stopped_list):
|
||
print()
|
||
print('Continue stopped processes...')
|
||
t1 = time()
|
||
if len(stopped_list) > 0:
|
||
for pid in stopped_list:
|
||
print('Send SIGCONT to', [pid], pid_to_name(pid))
|
||
try:
|
||
os.kill(int(pid), SIGCONT)
|
||
except FileNotFoundError:
|
||
continue
|
||
except ProcessLookupError:
|
||
continue
|
||
t2 = time()
|
||
print('All cont time: ', t2 - t1)
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
def update_stat_dict_and_print(key):
|
||
|
||
if key not in stat_dict:
|
||
|
||
stat_dict.update({key: 1})
|
||
|
||
else:
|
||
|
||
new_value = stat_dict[key] + 1
|
||
stat_dict.update({key: new_value})
|
||
|
||
if print_total_stat:
|
||
|
||
stats_msg = '{}\n\033[4mThe following corrective actions have been implemented in the last {}:\033[0m'.format(
|
||
HR, format_time(time() - start_time))
|
||
|
||
for i in stat_dict:
|
||
stats_msg += '\n- {}: {}'.format(i, stat_dict[i])
|
||
|
||
print(stats_msg)
|
||
|
||
|
||
def psi_mem_some_avg_total():
|
||
if psi_support:
|
||
return float(rline1(psi_path).rpartition('=')[2])
|
||
|
||
|
||
def psi_mem_some_avg10():
|
||
return float(rline1(psi_path).split(' ')[1].split('=')[1])
|
||
|
||
|
||
def check_mem():
|
||
"""find mem_available"""
|
||
return int(rline1('/proc/meminfo').split(':')[1].strip(' kB\n'))
|
||
|
||
|
||
def check_mem_and_swap():
|
||
"""find mem_available, swap_total, swap_free"""
|
||
with open('/proc/meminfo') as f:
|
||
for n, line in enumerate(f):
|
||
if n is 2:
|
||
mem_available = int(line.split(':')[1].strip(' kB\n'))
|
||
continue
|
||
if n is swap_total_index:
|
||
swap_total = int(line.split(':')[1].strip(' kB\n'))
|
||
continue
|
||
if n is swap_free_index:
|
||
swap_free = int(line.split(':')[1].strip(' kB\n'))
|
||
break
|
||
return mem_available, swap_total, swap_free
|
||
|
||
|
||
def check_zram():
|
||
"""find MemUsedZram"""
|
||
disksize_sum = 0
|
||
mem_used_total_sum = 0
|
||
|
||
for dev in os.listdir('/sys/block'):
|
||
if dev.startswith('zram'):
|
||
stat = zram_stat(dev)
|
||
disksize_sum += int(stat[0])
|
||
mem_used_total_sum += int(stat[1])
|
||
|
||
ZRAM_DISKSIZE_FACTOR = 0.0042
|
||
# Означает, что при задани zram disksize = 1 GiB доступная память
|
||
# уменьшится на 0.0042 GiB.
|
||
# Найден экспериментально, требует уточнения с разными ядрами и архитектурами.
|
||
# На небольших дисксайзах (до гигабайта) может быть больше, до 0.0045.
|
||
# Создатель модуля zram утверждает, что ZRAM_DISKSIZE_FACTOR доожен быть 0.001:
|
||
# ("zram uses about 0.1% of the size of the disk"
|
||
# - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
|
||
# но это утверждение противоречит опытным данным.
|
||
# ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize
|
||
# found experimentally
|
||
|
||
return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
|
||
|
||
|
||
def format_time(t):
|
||
t = int(t)
|
||
if t < 60:
|
||
return '{} sec'.format(t)
|
||
elif t >= 60 and t < 3600:
|
||
m = t // 60
|
||
s = t % 60
|
||
return '{} min {} sec'.format(m, s)
|
||
else:
|
||
h = t // 3600
|
||
s0 = t - h * 3600
|
||
m = s0 // 60
|
||
s = s0 % 60
|
||
return '{} h {} min {} sec'.format(h, m, s)
|
||
|
||
|
||
def string_to_float_convert_test(string):
|
||
"""Try to interprete string values as floats."""
|
||
try:
|
||
return float(string)
|
||
except ValueError:
|
||
return None
|
||
|
||
|
||
def string_to_int_convert_test(string):
|
||
"""Try to interpret string values as integers."""
|
||
try:
|
||
return int(string)
|
||
except ValueError:
|
||
return None
|
||
|
||
|
||
def conf_parse_string(param):
|
||
"""
|
||
Get string parameters from the config dict.
|
||
|
||
param: config_dict key
|
||
returns config_dict[param].strip()
|
||
"""
|
||
if param in config_dict:
|
||
return config_dict[param].strip()
|
||
else:
|
||
print('All the necessary parameters must be in the config')
|
||
print('There is no "{}" parameter in the config'.format(param))
|
||
exit()
|
||
|
||
|
||
def conf_parse_bool(param):
|
||
"""
|
||
Get bool parameters from the config_dict.
|
||
|
||
param: config_dicst key
|
||
returns bool
|
||
"""
|
||
if param in config_dict:
|
||
param_str = config_dict[param]
|
||
if param_str == 'True':
|
||
return True
|
||
elif param_str == 'False':
|
||
return False
|
||
else:
|
||
print('Invalid value of the "{}" parameter.'.format(param_str))
|
||
print('Valid values are True and False.')
|
||
print('Exit')
|
||
exit()
|
||
else:
|
||
print('All the necessary parameters must be in the config')
|
||
print('There is no "{}" parameter in the config'.format(param_str))
|
||
exit()
|
||
|
||
|
||
def rline1(path):
|
||
"""read 1st line from path."""
|
||
with open(path) as f:
|
||
for line in f:
|
||
return line[:-1]
|
||
|
||
|
||
def kib_to_mib(num):
|
||
"""Convert KiB values to MiB values."""
|
||
return round(num / 1024.0)
|
||
|
||
|
||
def percent(num):
|
||
"""Interprete mum as percentage."""
|
||
return round(num * 100, 1)
|
||
|
||
|
||
def just_percent_mem(num):
|
||
"""convert num to percent and justify"""
|
||
return str(round(num * 100, 1)).rjust(4, ' ')
|
||
|
||
|
||
def just_percent_swap(num):
|
||
return str(round(num * 100, 1)).rjust(5, ' ')
|
||
|
||
|
||
def human(num, lenth):
|
||
"""Convert KiB values to MiB values with right alignment"""
|
||
return str(round(num / 1024)).rjust(lenth, ' ')
|
||
|
||
|
||
def zram_stat(zram_id):
|
||
"""
|
||
Get zram state.
|
||
|
||
zram_id: str zram block-device id
|
||
returns bytes diskcize, str mem_used_total
|
||
"""
|
||
try:
|
||
disksize = rline1('/sys/block/' + zram_id + '/disksize')
|
||
except FileNotFoundError:
|
||
return '0', '0'
|
||
if disksize == ['0\n']:
|
||
return '0', '0'
|
||
try:
|
||
mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ')
|
||
mm_stat_list = []
|
||
for i in mm_stat:
|
||
if i != '':
|
||
mm_stat_list.append(i)
|
||
mem_used_total = mm_stat_list[2]
|
||
except FileNotFoundError:
|
||
mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total')
|
||
return disksize, mem_used_total # BYTES, str
|
||
|
||
|
||
def pid_to_name(pid):
|
||
"""
|
||
Get process name by pid.
|
||
|
||
pid: str pid of required process
|
||
returns string process_name
|
||
"""
|
||
try:
|
||
with open('/proc/' + pid + '/status') as f:
|
||
f.seek(6)
|
||
for line in f:
|
||
return line[:-1]
|
||
except FileNotFoundError:
|
||
return ''
|
||
except ProcessLookupError:
|
||
return ''
|
||
except UnicodeDecodeError:
|
||
with open('/proc/' + pid + '/status', 'rb') as f:
|
||
f.seek(6)
|
||
return f.read(15).decode(
|
||
'utf-8', 'ignore').partition('\n')[0]
|
||
|
||
|
||
def pid_to_cmdline(pid):
|
||
"""
|
||
Get process cmdline by pid.
|
||
|
||
pid: str pid of required process
|
||
returns string cmdline
|
||
"""
|
||
with open('/proc/' + pid + '/cmdline') as f:
|
||
return f.read().replace('\x00', ' ').rstrip()
|
||
|
||
|
||
|
||
def pid_to_environ(pid):
|
||
"""
|
||
Get process cmdline by pid.
|
||
|
||
pid: str pid of required process
|
||
returns string cmdline
|
||
"""
|
||
with open('/proc/' + pid + '/environ') as f:
|
||
return f.read().replace('\x00', '\n').rstrip()
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
def pid_to_uid(pid):
|
||
'''return euid'''
|
||
try:
|
||
with open('/proc/' + pid + '/status') as f:
|
||
for n, line in enumerate(f):
|
||
if n is uid_index:
|
||
return line.split('\t')[2]
|
||
except UnicodeDecodeError:
|
||
with open('/proc/' + pid + '/status', 'rb') as f:
|
||
f_list = f.read().decode('utf-8', 'ignore').split('\n')
|
||
return f_list[uid_index].split('\t')[2]
|
||
|
||
|
||
def notify_send_wait(title, body):
|
||
'''GUI notifications with UID != 0'''
|
||
with Popen(['notify-send', '--icon=dialog-warning', title, body]) as proc:
|
||
try:
|
||
proc.wait(timeout=wait_time)
|
||
except TimeoutExpired:
|
||
proc.kill()
|
||
print('TimeoutExpired: notify-send {} {}'.format(title, body))
|
||
|
||
|
||
def notify_helper(title, body):
|
||
'''GUI notification with UID = 0'''
|
||
with Popen([notify_helper_path, title, body]) as proc:
|
||
try:
|
||
proc.wait(timeout=wait_time)
|
||
except TimeoutExpired:
|
||
proc.kill()
|
||
print(
|
||
'TimeoutExpired: nohang_notify_helper: {} {}'.format(
|
||
title, body))
|
||
|
||
|
||
def send_notify_warn():
|
||
"""
|
||
Look for process with maximum 'badness' and warn user with notification.
|
||
(implement Low memory warnings)
|
||
"""
|
||
|
||
if stop_cont_warn:
|
||
stopped_list = stop()
|
||
|
||
# find process with max badness
|
||
fat_tuple = fattest()
|
||
pid = fat_tuple[0]
|
||
name = pid_to_name(pid)
|
||
|
||
if mem_used_zram > 0:
|
||
low_mem_percent = '{}% {}% {}%'.format(
|
||
round(mem_available / mem_total * 100),
|
||
round(swap_free / (swap_total + 0.1) * 100),
|
||
round(mem_used_zram / mem_total * 100))
|
||
elif swap_free > 0:
|
||
low_mem_percent = '{}% {}%'.format(
|
||
round(mem_available / mem_total * 100),
|
||
round(swap_free / (swap_total + 0.1) * 100))
|
||
else:
|
||
low_mem_percent = '{}%'.format(
|
||
round(mem_available / mem_total * 100))
|
||
|
||
# title = 'Low memory: {}'.format(low_mem_percent)
|
||
title = 'Low memory'
|
||
|
||
body = 'Hog: <b>{}</b> [{}]'.format(
|
||
name.replace(
|
||
# symbol '&' can break notifications in some themes,
|
||
# therefore it is replaced by '*'
|
||
'&', '*'),
|
||
pid
|
||
)
|
||
|
||
if root: # If nohang was started by root
|
||
# send notification to all active users with special script
|
||
notify_helper(title, body)
|
||
else: # Or by regular user
|
||
# send notification to user that runs this nohang
|
||
notify_send_wait(title, body)
|
||
|
||
if stop_cont_warn:
|
||
cont(stopped_list)
|
||
|
||
|
||
def send_notify(signal, name, pid):
|
||
"""
|
||
Notificate about OOM Preventing.
|
||
|
||
signal: key for notify_sig_dict
|
||
name: str process name
|
||
pid: str process pid
|
||
"""
|
||
title = 'Hang prevention'
|
||
body = '<b>{} {}</b> [{}]'.format(
|
||
notify_sig_dict[signal],
|
||
name.replace(
|
||
# symbol '&' can break notifications in some themes,
|
||
# therefore it is replaced by '*'
|
||
'&', '*'),
|
||
pid
|
||
)
|
||
if root:
|
||
# send notification to all active users with notify-send
|
||
notify_helper(title, body)
|
||
else:
|
||
# send notification to user that runs this nohang
|
||
notify_send_wait(title, body)
|
||
|
||
|
||
def send_notify_etc(pid, name, command):
|
||
"""
|
||
Notificate about OOM Preventing.
|
||
|
||
command: str command that will be executed
|
||
name: str process name
|
||
pid: str process pid
|
||
"""
|
||
title = 'Hang prevention'
|
||
body = 'Victim is process <b>{}</b> [{}]\nExecute the command:\n<b>{}</b>'.format(
|
||
name.replace('&', '*'),
|
||
pid,
|
||
command.replace('&', '*')
|
||
)
|
||
if root:
|
||
# send notification to all active users with notify-send
|
||
notify_helper(title, body)
|
||
else:
|
||
# send notification to user that runs this nohang
|
||
notify_send_wait(title, body)
|
||
|
||
|
||
def sleep_after_send_signal(signal):
|
||
"""
|
||
Sleeping after signal was sent.
|
||
|
||
signal: sent signal
|
||
"""
|
||
if signal is SIGKILL:
|
||
if print_sleep_periods:
|
||
print(' sleep', min_delay_after_sigkill)
|
||
sleep(min_delay_after_sigkill)
|
||
else:
|
||
if print_sleep_periods:
|
||
print(' sleep', min_delay_after_sigterm)
|
||
sleep(min_delay_after_sigterm)
|
||
|
||
|
||
def fattest():
|
||
"""
|
||
Find the process with highest badness and its badness adjustment
|
||
Return pid and badness
|
||
|
||
-> find_mem_hog() or find_victim()
|
||
"""
|
||
|
||
pid_badness_list = []
|
||
|
||
for pid in os.listdir('/proc'):
|
||
# only directories whose names consist only of numbers, except /proc/1/
|
||
#if pid[0].isdecimal() is False:
|
||
# continue
|
||
|
||
|
||
if pid[0].isdecimal() is False or pid is '1' or pid == self_pid:
|
||
continue
|
||
|
||
|
||
|
||
|
||
|
||
x = os.path.exists('/proc/' + pid + '/exe')
|
||
if x is False:
|
||
continue
|
||
|
||
#print(x)
|
||
# в таком случае в список попадет self и selfpid
|
||
# их можно отдельно вычленить
|
||
|
||
|
||
# find and modify badness (if it needs)
|
||
try:
|
||
|
||
|
||
#realpath = os.path.realpath('/proc/' + pid + '/exe')
|
||
#print(pid, pid_to_name(pid), realpath)
|
||
|
||
|
||
badness = int(rline1('/proc/' + pid + '/oom_score'))
|
||
#state = pid_to_state(pid)
|
||
name = pid_to_name(pid)
|
||
|
||
|
||
|
||
|
||
except FileNotFoundError:
|
||
continue
|
||
except ProcessLookupError:
|
||
continue
|
||
pid_badness_list.append((pid, badness))
|
||
|
||
# Make list of (pid, badness) tuples, sorted by 'badness' values
|
||
pid_tuple_list = sorted(
|
||
pid_badness_list,
|
||
key=itemgetter(1),
|
||
reverse=True
|
||
)[0]
|
||
|
||
pid = pid_tuple_list[0]
|
||
|
||
# Get maximum 'badness' value
|
||
victim_badness = pid_tuple_list[1]
|
||
|
||
|
||
|
||
#print(pid_badness_list)
|
||
|
||
|
||
|
||
|
||
|
||
|
||
return pid, victim_badness
|
||
|
||
|
||
|
||
t0 = time()
|
||
x = fattest()
|
||
t1 = time()
|
||
|
||
|
||
print(t1 - t0)
|
||
|
||
|
||
|
||
print(x)
|
||
|
||
exit()
|
||
|
||
|
||
|
||
|
||
def find_victim_and_send_signal(signal):
|
||
"""
|
||
Find victim with highest badness and send SIGTERM/SIGKILL
|
||
|
||
-> implement_corrective_action()
|
||
"""
|
||
|
||
|
||
if stop_cont:
|
||
stopped_list = stop()
|
||
|
||
|
||
|
||
pid, victim_badness = fattest()
|
||
name = pid_to_name(pid)
|
||
|
||
if victim_badness >= min_badness:
|
||
|
||
# Try to send signal to found victim
|
||
|
||
# Get VmRSS and VmSwap and cmdline of victim process
|
||
# and try to send a signal
|
||
try:
|
||
|
||
with open('/proc/' + pid + '/status') as f:
|
||
for n, line in enumerate(f):
|
||
|
||
if n is uid_index:
|
||
uid = line.split('\t')[1]
|
||
continue
|
||
|
||
if n is vm_size_index:
|
||
vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
|
||
continue
|
||
|
||
if n is vm_rss_index:
|
||
vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
|
||
continue
|
||
|
||
if detailed_rss:
|
||
|
||
if n is anon_index:
|
||
anon_rss = kib_to_mib(
|
||
int(line.split('\t')[1][:-4]))
|
||
continue
|
||
|
||
if n is file_index:
|
||
file_rss = kib_to_mib(
|
||
int(line.split('\t')[1][:-4]))
|
||
continue
|
||
|
||
if n is shmem_index:
|
||
shmem_rss = kib_to_mib(
|
||
int(line.split('\t')[1][:-4]))
|
||
continue
|
||
|
||
if n is vm_swap_index:
|
||
vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
|
||
break
|
||
|
||
with open('/proc/' + pid + '/cmdline') as file:
|
||
cmdline = file.readlines()[0].replace('\x00', ' ')
|
||
|
||
oom_score = rline1('/proc/' + pid + '/oom_score')
|
||
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
|
||
|
||
except FileNotFoundError:
|
||
print(mem_info)
|
||
print('The victim died in the search process: FileNotFoundError')
|
||
update_stat_dict_and_print(
|
||
'The victim died in the search process: FileNotFoundError')
|
||
return None
|
||
except ProcessLookupError:
|
||
print(mem_info)
|
||
print('The victim died in the search process: ProcessLookupError')
|
||
update_stat_dict_and_print(
|
||
'The victim died in the search process: ProcessLookupError')
|
||
return None
|
||
except UnicodeDecodeError:
|
||
|
||
# тут надо снова все исключ обработать
|
||
|
||
with open('/proc/' + pid + '/status', 'rb') as f:
|
||
f_list = f.read().decode('utf-8', 'ignore').split('\n')
|
||
|
||
for i in range(len(f_list)):
|
||
if i is ppid_index:
|
||
ppid = f_list[i].split('\t')[2]
|
||
|
||
|
||
for i in range(len(f_list)):
|
||
if i is uid_index:
|
||
uid = f_list[i].split('\t')[2]
|
||
|
||
|
||
|
||
|
||
if i is vm_size_index:
|
||
vm_size = kib_to_mib(
|
||
int(f_list[i].split('\t')[1][:-3]))
|
||
|
||
if i is vm_rss_index:
|
||
vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
|
||
|
||
if detailed_rss:
|
||
|
||
if i is anon_index:
|
||
anon_rss = kib_to_mib(
|
||
int(f_list[i].split('\t')[1][:-3]))
|
||
|
||
if i is file_index:
|
||
file_rss = kib_to_mib(
|
||
int(f_list[i].split('\t')[1][:-3]))
|
||
|
||
if i is shmem_index:
|
||
shmem_rss = kib_to_mib(
|
||
int(f_list[i].split('\t')[1][:-3]))
|
||
|
||
if i is vm_swap_index:
|
||
vm_swap = kib_to_mib(
|
||
int(f_list[i].split('\t')[1][:-3]))
|
||
|
||
with open('/proc/' + pid + '/cmdline') as file:
|
||
cmdline = file.readlines()[0].replace('\x00', ' ')
|
||
|
||
oom_score = rline1('/proc/' + pid + '/oom_score')
|
||
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
|
||
|
||
except IndexError:
|
||
print(mem_info)
|
||
print('The victim died in the search process: IndexError')
|
||
update_stat_dict_and_print(
|
||
'The victim died in the search process: IndexError')
|
||
return None
|
||
except ValueError:
|
||
print(mem_info)
|
||
print('The victim died in the search process: ValueError')
|
||
update_stat_dict_and_print(
|
||
'The victim died in the search process: ValueError')
|
||
return None
|
||
|
||
len_vm = len(str(vm_size))
|
||
|
||
if detailed_rss:
|
||
|
||
environ = pid_to_environ(pid)
|
||
|
||
victim_info = '\033[4mFound a process with highest badness:\033[0m' \
|
||
'\n Name: \033[33m{}\033[0m' \
|
||
'\n PID: \033[33m{}\033[0m' \
|
||
'\n UID: \033[33m{}\033[0m' \
|
||
'\n badness: \033[33m{}\033[0m, ' \
|
||
'oom_score: \033[33m{}\033[0m, ' \
|
||
'oom_score_adj: \033[33m{}\033[0m' \
|
||
'\n VmSize: \033[33m{}\033[0m MiB' \
|
||
'\n VmRSS: \033[33m{}\033[0m MiB (' \
|
||
'Anon: \033[33m{}\033[0m MiB, ' \
|
||
'File: \033[33m{}\033[0m MiB, ' \
|
||
'Shmem: \033[33m{}\033[0m MiB)' \
|
||
'\n VmSwap: \033[33m{}\033[0m MiB' \
|
||
'\n environ:\n\033[33m{}\033[0m' \
|
||
'\n cmdline: \033[33m{}\033[0m'.format(
|
||
name,
|
||
pid,
|
||
uid,
|
||
victim_badness,
|
||
oom_score,
|
||
oom_score_adj,
|
||
vm_size,
|
||
str(vm_rss).rjust(len_vm),
|
||
anon_rss,
|
||
file_rss,
|
||
shmem_rss,
|
||
str(vm_swap).rjust(len_vm),
|
||
environ,
|
||
cmdline
|
||
)
|
||
else:
|
||
|
||
|
||
# нахер такое ветвление
|
||
|
||
victim_info = '\033[4mFound a process with highest badness:\033[0m' \
|
||
'\n Name: \033[33m{}\033[0m' \
|
||
'\n PID: \033[33m{}\033[0m' \
|
||
'\n UID: \033[33m{}\033[0m' \
|
||
'\n Badness: \033[33m{}\033[0m, ' \
|
||
'oom_score: \033[33m{}\033[0m, ' \
|
||
'oom_score_adj: \033[33m{}\033[0m' \
|
||
'\n VmSize: \033[33m{}\033[0m MiB' \
|
||
'\n VmRSS: \033[33m{}\033[0m MiB' \
|
||
'\n VmSwap: \033[33m{}\033[0m MiB' \
|
||
'\n CmdLine: \033[33m{}\033[0m'.format(
|
||
name,
|
||
pid,
|
||
uid,
|
||
victim_badness,
|
||
oom_score,
|
||
oom_score_adj,
|
||
vm_size,
|
||
str(vm_rss).rjust(len_vm),
|
||
str(vm_swap).rjust(len_vm),
|
||
cmdline)
|
||
|
||
if execute_the_command and signal is SIGTERM and name in etc_dict:
|
||
command = etc_dict[name]
|
||
exit_status = os.system(etc_dict[name].replace(
|
||
'$PID', pid).replace('$NAME', pid_to_name(pid)))
|
||
if exit_status == 0:
|
||
exit_status = '\033[32m0\033[0m'
|
||
else:
|
||
exit_status = '\033[31m{}\033[0m'.format(exit_status)
|
||
|
||
response_time = time() - time0
|
||
|
||
etc_info = '{}' \
|
||
'\n\033[4mImplement corrective action:\033[0m\n Run the command: \033[4m{}\033[0m' \
|
||
'\n Exit status: {}; response time: {} ms'.format(
|
||
victim_info, command.replace(
|
||
'$PID', pid).replace('$NAME', pid_to_name(pid)), exit_status,
|
||
round(response_time * 1000))
|
||
|
||
print(mem_info)
|
||
print(etc_info)
|
||
|
||
key = "Run the command '\033[35m{}\033[0m'".format(command)
|
||
update_stat_dict_and_print(key)
|
||
|
||
if gui_notifications:
|
||
send_notify_etc(
|
||
pid,
|
||
name,
|
||
command.replace('$PID', pid).replace('$NAME', pid_to_name(pid)))
|
||
|
||
else:
|
||
|
||
try:
|
||
|
||
|
||
|
||
|
||
m = check_mem_and_swap()
|
||
ma = round(int(m[0]) / 1024.0)
|
||
sf = round(int(m[2]) / 1024.0)
|
||
print('\nMemory status before sending a signal:\nMemA'
|
||
'v: {} MiB, SwFree: {} MiB'.format(ma, sf))
|
||
|
||
|
||
|
||
if stop_cont:
|
||
os.kill(int(pid), SIGCONT)
|
||
|
||
|
||
os.kill(int(pid), signal)
|
||
response_time = time() - time0
|
||
send_result = '\033[32mOK\033[0m; response time: {} ms'.format(
|
||
round(response_time * 1000))
|
||
|
||
preventing_oom_message = '{}' \
|
||
'\n\033[4mImplement a corrective action:\033[0m\n ' \
|
||
'Sending \033[4m{}\033[0m to the victim; {}'.format(
|
||
victim_info, sig_dict[signal], send_result)
|
||
|
||
key = 'Send \033[35m{}\033[0m to \033[35m{}\033[0m'.format(
|
||
sig_dict[signal], name)
|
||
|
||
if gui_notifications:
|
||
send_notify(signal, name, pid)
|
||
|
||
except FileNotFoundError:
|
||
response_time = time() - time0
|
||
send_result = 'no such process; response time: {} ms'.format(
|
||
round(response_time * 1000))
|
||
key = 'The victim died in the search process: FileNotFoundError'
|
||
except ProcessLookupError:
|
||
response_time = time() - time0
|
||
send_result = 'no such process; response time: {} ms'.format(
|
||
round(response_time * 1000))
|
||
key = 'The victim died in the search process: ProcessLookupError'
|
||
|
||
print(mem_info)
|
||
print(pid_to_state(pid))
|
||
print(preventing_oom_message)
|
||
|
||
update_stat_dict_and_print(key)
|
||
|
||
else:
|
||
|
||
response_time = time() - time0
|
||
print(mem_info)
|
||
victim_badness_is_too_small = 'victim badness {} < min_b' \
|
||
'adness {}; nothing to do; response time: {} ms'.format(
|
||
victim_badness,
|
||
min_badness,
|
||
round(response_time * 1000))
|
||
|
||
print(victim_badness_is_too_small)
|
||
|
||
# update stat_dict
|
||
key = 'victim badness < min_badness'
|
||
update_stat_dict_and_print(key)
|
||
|
||
|
||
if stop_cont:
|
||
cont(stopped_list)
|
||
|
||
sleep_after_send_signal(signal)
|
||
|
||
|
||
def sleep_after_check_mem():
|
||
"""Specify sleep times depends on rates and avialable memory."""
|
||
|
||
if mem_min_sigkill_kb < mem_min_sigterm_kb:
|
||
mem_point = mem_available - mem_min_sigterm_kb
|
||
else:
|
||
mem_point = mem_available - mem_min_sigkill_kb
|
||
|
||
if swap_min_sigkill_kb < swap_min_sigterm_kb:
|
||
swap_point = swap_free - swap_min_sigterm_kb
|
||
else:
|
||
swap_point = swap_free - swap_min_sigkill_kb
|
||
|
||
t_mem = mem_point / rate_mem
|
||
t_swap = swap_point / rate_swap
|
||
t_zram = (mem_total * 0.9 - mem_used_zram) / rate_zram
|
||
|
||
t_mem_swap = t_mem + t_swap
|
||
t_mem_zram = t_mem + t_zram
|
||
|
||
if t_mem_swap <= t_mem_zram:
|
||
t = t_mem_swap
|
||
else:
|
||
t = t_mem_zram
|
||
|
||
if t > max_sleep_time:
|
||
t = max_sleep_time
|
||
elif t < min_sleep_time:
|
||
t = min_sleep_time
|
||
else:
|
||
pass
|
||
|
||
try:
|
||
if print_sleep_periods:
|
||
print('sleep', round(t, 2))
|
||
# ' (t_mem={}, t_swap={}, t_zram={})'.format(
|
||
# round(t_mem, 2),
|
||
# round(t_swap, 2),
|
||
# round(t_zram, 2)))
|
||
stdout.flush()
|
||
sleep(t)
|
||
except KeyboardInterrupt:
|
||
exit()
|
||
|
||
|
||
def calculate_percent(arg_key):
|
||
"""
|
||
parse conf dict
|
||
Calculate mem_min_KEY_percent.
|
||
|
||
Try use this one)
|
||
arg_key: str key for config_dict
|
||
returns int mem_min_percent or NoneType if got some error
|
||
"""
|
||
|
||
if arg_key in config_dict:
|
||
mem_min = config_dict[arg_key]
|
||
|
||
if mem_min.endswith('%'):
|
||
# truncate percents, so we have a number
|
||
mem_min_percent = mem_min[:-1].strip()
|
||
# then 'float test'
|
||
mem_min_percent = string_to_float_convert_test(mem_min_percent)
|
||
if mem_min_percent is None:
|
||
print('Invalid {} value, not float\nExit'.format(arg_key))
|
||
exit()
|
||
# Final validations...
|
||
if mem_min_percent < 0 or mem_min_percent > 100:
|
||
print(
|
||
'{}, as percents value, out of range [0; 100]\nExit'.format(arg_key))
|
||
exit()
|
||
|
||
# mem_min_sigterm_percent is clean and valid float percentage. Can
|
||
# translate into Kb
|
||
mem_min_kb = mem_min_percent / 100 * mem_total
|
||
mem_min_mb = round(mem_min_kb / 1024)
|
||
|
||
elif mem_min.endswith('M'):
|
||
mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip())
|
||
if mem_min_mb is None:
|
||
print('Invalid {} value, not float\nExit'.format(arg_key))
|
||
exit()
|
||
mem_min_kb = mem_min_mb * 1024
|
||
if mem_min_kb > mem_total:
|
||
print(
|
||
'{} value can not be greater then MemTotal ({} MiB)\nExit'.format(
|
||
arg_key, round(
|
||
mem_total / 1024)))
|
||
exit()
|
||
mem_min_percent = mem_min_kb / mem_total * 100
|
||
|
||
else:
|
||
print('Invalid {} units in config.\n Exit'.format(arg_key))
|
||
mem_min_percent = None
|
||
|
||
else:
|
||
print('{} not in config\nExit'.format(arg_key))
|
||
mem_min_percent = None
|
||
|
||
return mem_min_kb, mem_min_mb, mem_min_percent
|
||
|
||
|
||
##########################################################################
|
||
|
||
|
||
# find mem_total
|
||
# find positions of SwapFree and SwapTotal in /proc/meminfo
|
||
|
||
with open('/proc/meminfo') as f:
|
||
mem_list = f.readlines()
|
||
|
||
mem_list_names = []
|
||
for s in mem_list:
|
||
mem_list_names.append(s.split(':')[0])
|
||
|
||
if mem_list_names[2] != 'MemAvailable':
|
||
print('Your Linux kernel is too old, Linux 3.14+ requied\nExit')
|
||
exit()
|
||
|
||
swap_total_index = mem_list_names.index('SwapTotal')
|
||
swap_free_index = swap_total_index + 1
|
||
|
||
mem_total = int(mem_list[0].split(':')[1].strip(' kB\n'))
|
||
|
||
# Get names from /proc/*/status to be able to get VmRSS and VmSwap values
|
||
|
||
with open('/proc/self/status') as file:
|
||
status_list = file.readlines()
|
||
|
||
status_names = []
|
||
for s in status_list:
|
||
status_names.append(s.split(':')[0])
|
||
|
||
ppid_index = status_names.index('PPid')
|
||
vm_size_index = status_names.index('VmSize')
|
||
vm_rss_index = status_names.index('VmRSS')
|
||
vm_swap_index = status_names.index('VmSwap')
|
||
uid_index = status_names.index('Uid')
|
||
|
||
try:
|
||
anon_index = status_names.index('RssAnon')
|
||
file_index = status_names.index('RssFile')
|
||
shmem_index = status_names.index('RssShmem')
|
||
detailed_rss = True
|
||
# print(detailed_rss, 'detailed_rss')
|
||
except ValueError:
|
||
detailed_rss = False
|
||
# print('It is not Linux 4.5+')
|
||
|
||
##########################################################################
|
||
|
||
'''
|
||
# Configurations
|
||
|
||
cd = os.getcwd()
|
||
|
||
'''
|
||
|
||
config = '/etc/nohang/nohang.conf'
|
||
|
||
# config = 'nohang.conf'
|
||
|
||
print('Config:', config)
|
||
|
||
|
||
##########################################################################
|
||
|
||
# parsing the config with obtaining the parameters dictionary
|
||
|
||
# conf_parameters_dict
|
||
# conf_restart_dict
|
||
|
||
# dictionary with config options
|
||
config_dict = dict()
|
||
|
||
processname_re_list = []
|
||
cmdline_re_list = []
|
||
uid_re_list = []
|
||
|
||
# dictionary with names and commands for the parameter
|
||
# execute_the_command
|
||
# тут тоже список нужен, а не словарь
|
||
etc_dict = dict()
|
||
|
||
# will store corrective actions stat
|
||
stat_dict = dict()
|
||
|
||
try:
|
||
with open(config) as f:
|
||
|
||
for line in f:
|
||
|
||
a = line.startswith('#')
|
||
b = line.startswith('\n')
|
||
c = line.startswith('\t')
|
||
d = line.startswith(' ')
|
||
|
||
etc = line.startswith('$ETC')
|
||
|
||
if not a and not b and not c and not d and not etc:
|
||
a = line.partition('=')
|
||
config_dict[a[0].strip()] = a[2].strip()
|
||
|
||
if etc:
|
||
a = line[4:].split('///')
|
||
etc_name = a[0].strip()
|
||
etc_command = a[1].strip()
|
||
if len(etc_name) > 15:
|
||
print('Invalid config, the length of the process '
|
||
'name must not exceed 15 characters\nExit')
|
||
exit()
|
||
etc_dict[etc_name] = etc_command
|
||
|
||
# NEED VALIDATION!
|
||
if line.startswith('@PROCESSNAME_RE'):
|
||
a = line.partition('@PROCESSNAME_RE')[
|
||
2].strip(' \n').partition('///')
|
||
processname_re_list.append((a[0].strip(' '), a[2].strip(' ')))
|
||
|
||
if line.startswith('@CMDLINE_RE'):
|
||
a = line.partition('@CMDLINE_RE')[2].strip(
|
||
' \n').partition('///')
|
||
cmdline_re_list.append((a[0].strip(' '), a[2].strip(' ')))
|
||
|
||
if line.startswith('@UID_RE'):
|
||
a = line.partition('@UID_RE')[2].strip(' \n').partition('///')
|
||
uid_re_list.append((a[0].strip(' '), a[2].strip(' ')))
|
||
|
||
except PermissionError:
|
||
print('PermissionError', conf_err_mess)
|
||
exit()
|
||
except UnicodeDecodeError:
|
||
print('UnicodeDecodeError', conf_err_mess)
|
||
exit()
|
||
except IsADirectoryError:
|
||
print('IsADirectoryError', conf_err_mess)
|
||
exit()
|
||
except IndexError:
|
||
print('IndexError', conf_err_mess)
|
||
exit()
|
||
|
||
# print(processname_re_list)
|
||
# print(cmdline_re_list)
|
||
# print(uid_re_list)
|
||
|
||
##########################################################################
|
||
|
||
|
||
# extracting parameters from the dictionary
|
||
# check for all necessary parameters
|
||
# validation of all parameters
|
||
|
||
print_config = conf_parse_bool('print_config')
|
||
print_mem_check_results = conf_parse_bool('print_mem_check_results')
|
||
print_sleep_periods = conf_parse_bool('print_sleep_periods')
|
||
gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings')
|
||
gui_notifications = conf_parse_bool('gui_notifications')
|
||
decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj')
|
||
execute_the_command = conf_parse_bool('execute_the_command')
|
||
|
||
|
||
ignore_psi = conf_parse_bool('ignore_psi')
|
||
|
||
regex_matching = conf_parse_bool('regex_matching')
|
||
|
||
re_match_cmdline = conf_parse_bool('re_match_cmdline')
|
||
|
||
re_match_uid = conf_parse_bool('re_match_uid')
|
||
|
||
if regex_matching or re_match_cmdline or re_match_uid:
|
||
from re import search
|
||
import sre_constants
|
||
|
||
mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent = calculate_percent(
|
||
'mem_min_sigterm')
|
||
mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent = calculate_percent(
|
||
'mem_min_sigkill')
|
||
|
||
zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent = calculate_percent(
|
||
'zram_max_sigterm')
|
||
zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent = calculate_percent(
|
||
'zram_max_sigkill')
|
||
|
||
mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent = calculate_percent(
|
||
'mem_min_warnings')
|
||
zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent = calculate_percent(
|
||
'zram_max_warnings')
|
||
|
||
|
||
if 'rate_mem' in config_dict:
|
||
rate_mem = string_to_float_convert_test(config_dict['rate_mem'])
|
||
if rate_mem is None:
|
||
print('Invalid rate_mem value, not float\nExit')
|
||
exit()
|
||
if rate_mem <= 0:
|
||
print('rate_mem MUST be > 0\nExit')
|
||
exit()
|
||
else:
|
||
print('rate_mem not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'rate_swap' in config_dict:
|
||
rate_swap = string_to_float_convert_test(config_dict['rate_swap'])
|
||
if rate_swap is None:
|
||
print('Invalid rate_swap value, not float\nExit')
|
||
exit()
|
||
if rate_swap <= 0:
|
||
print('rate_swap MUST be > 0\nExit')
|
||
exit()
|
||
else:
|
||
print('rate_swap not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'rate_zram' in config_dict:
|
||
rate_zram = string_to_float_convert_test(config_dict['rate_zram'])
|
||
if rate_zram is None:
|
||
print('Invalid rate_zram value, not float\nExit')
|
||
exit()
|
||
if rate_zram <= 0:
|
||
print('rate_zram MUST be > 0\nExit')
|
||
exit()
|
||
else:
|
||
print('rate_zram not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'swap_min_sigterm' in config_dict:
|
||
swap_min_sigterm = config_dict['swap_min_sigterm']
|
||
else:
|
||
print('swap_min_sigterm not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'swap_min_sigkill' in config_dict:
|
||
swap_min_sigkill = config_dict['swap_min_sigkill']
|
||
else:
|
||
print('swap_min_sigkill not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'min_delay_after_sigterm' in config_dict:
|
||
min_delay_after_sigterm = string_to_float_convert_test(
|
||
config_dict['min_delay_after_sigterm'])
|
||
if min_delay_after_sigterm is None:
|
||
print('Invalid min_delay_after_sigterm value, not float\nExit')
|
||
exit()
|
||
if min_delay_after_sigterm < 0:
|
||
print('min_delay_after_sigterm must be positiv\nExit')
|
||
exit()
|
||
else:
|
||
print('min_delay_after_sigterm not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'min_delay_after_sigkill' in config_dict:
|
||
min_delay_after_sigkill = string_to_float_convert_test(
|
||
config_dict['min_delay_after_sigkill'])
|
||
if min_delay_after_sigkill is None:
|
||
print('Invalid min_delay_after_sigkill value, not float\nExit')
|
||
exit()
|
||
if min_delay_after_sigkill < 0:
|
||
print('min_delay_after_sigkill must be positive\nExit')
|
||
exit()
|
||
else:
|
||
print('min_delay_after_sigkill not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'psi_avg10_sleep_time' in config_dict:
|
||
psi_avg10_sleep_time = string_to_float_convert_test(
|
||
config_dict['psi_avg10_sleep_time'])
|
||
if psi_avg10_sleep_time is None:
|
||
print('Invalid psi_avg10_sleep_time value, not float\nExit')
|
||
exit()
|
||
if psi_avg10_sleep_time < 0:
|
||
print('psi_avg10_sleep_time must be positive\nExit')
|
||
exit()
|
||
else:
|
||
print('psi_avg10_sleep_time not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'sigkill_psi_avg10' in config_dict:
|
||
sigkill_psi_avg10 = string_to_float_convert_test(
|
||
config_dict['sigkill_psi_avg10'])
|
||
if sigkill_psi_avg10 is None:
|
||
print('Invalid sigkill_psi_avg10 value, not float\nExit')
|
||
exit()
|
||
if sigkill_psi_avg10 < 0 or sigkill_psi_avg10 > 100:
|
||
print('sigkill_psi_avg10 must be in the range [0; 100]\nExit')
|
||
exit()
|
||
else:
|
||
print('sigkill_psi_avg10 not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'sigterm_psi_avg10' in config_dict:
|
||
sigterm_psi_avg10 = string_to_float_convert_test(
|
||
config_dict['sigterm_psi_avg10'])
|
||
if sigterm_psi_avg10 is None:
|
||
print('Invalid sigterm_psi_avg10 value, not float\nExit')
|
||
exit()
|
||
if sigterm_psi_avg10 < 0 or sigterm_psi_avg10 > 100:
|
||
print('sigterm_psi_avg10 must be in the range [0; 100]\nExit')
|
||
exit()
|
||
else:
|
||
print('sigterm_psi_avg10 not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'min_badness' in config_dict:
|
||
min_badness = string_to_int_convert_test(
|
||
config_dict['min_badness'])
|
||
if min_badness is None:
|
||
print('Invalid min_badness value, not integer\nExit')
|
||
exit()
|
||
if min_badness < 0 or min_badness > 1000:
|
||
print('Invalud min_badness value\nExit')
|
||
exit()
|
||
else:
|
||
print('min_badness not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'oom_score_adj_max' in config_dict:
|
||
oom_score_adj_max = string_to_int_convert_test(
|
||
config_dict['oom_score_adj_max'])
|
||
if oom_score_adj_max is None:
|
||
print('Invalid oom_score_adj_max value, not integer\nExit')
|
||
exit()
|
||
if oom_score_adj_max < 0 or oom_score_adj_max > 1000:
|
||
print('Invalid oom_score_adj_max value\nExit')
|
||
exit()
|
||
else:
|
||
print('oom_score_adj_max not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'min_time_between_warnings' in config_dict:
|
||
min_time_between_warnings = string_to_float_convert_test(
|
||
config_dict['min_time_between_warnings'])
|
||
if min_time_between_warnings is None:
|
||
print('Invalid min_time_between_warnings value, not float\nExit')
|
||
exit()
|
||
if min_time_between_warnings < 1 or min_time_between_warnings > 300:
|
||
print('min_time_between_warnings value out of range [1; 300]\nExit')
|
||
exit()
|
||
else:
|
||
print('min_time_between_warnings not in config\nExit')
|
||
exit()
|
||
|
||
|
||
if 'swap_min_warnings' in config_dict:
|
||
swap_min_warnings = config_dict['swap_min_warnings']
|
||
else:
|
||
print('swap_min_warnings not in config\nExit')
|
||
exit()
|
||
|
||
|
||
##########################################################################
|
||
|
||
|
||
# Get KiB levels if it's possible.
|
||
|
||
# получ кб. если не кб - то процент. Если процент - находим кб ниже на
|
||
# основе полученного своптотал и процентов.
|
||
|
||
|
||
def get_swap_threshold_tuple(string):
|
||
# re (Num %, True) or (Num KiB, False)
|
||
"""Returns KiB value if abs val was set in config, or tuple with %"""
|
||
# return tuple with abs and bool: (abs %, True) or (abs MiB, False)
|
||
|
||
if string.endswith('%'):
|
||
valid = string_to_float_convert_test(string[:-1])
|
||
if valid is None:
|
||
print('somewhere swap unit is not float_%')
|
||
exit()
|
||
|
||
value = float(string[:-1].strip())
|
||
if value < 0 or value > 100:
|
||
print('invalid value, must be from the range[0; 100] %')
|
||
exit()
|
||
|
||
return value, True
|
||
|
||
elif string.endswith('M'):
|
||
valid = string_to_float_convert_test(string[:-1])
|
||
if valid is None:
|
||
print('somewhere swap unit is not float_M')
|
||
exit()
|
||
|
||
value = float(string[:-1].strip()) * 1024
|
||
if value < 0:
|
||
print('invalid unit in config (negative value)')
|
||
exit()
|
||
|
||
return value, False
|
||
|
||
else:
|
||
print('Invalid config file. There are invalid units somewhere\nExit')
|
||
exit()
|
||
|
||
|
||
swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm)
|
||
swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill)
|
||
swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings)
|
||
|
||
|
||
swap_term_is_percent = swap_min_sigterm_tuple[1]
|
||
if swap_term_is_percent:
|
||
swap_min_sigterm_percent = swap_min_sigterm_tuple[0]
|
||
else:
|
||
swap_min_sigterm_kb = swap_min_sigterm_tuple[0]
|
||
|
||
|
||
swap_kill_is_percent = swap_min_sigkill_tuple[1]
|
||
if swap_kill_is_percent:
|
||
swap_min_sigkill_percent = swap_min_sigkill_tuple[0]
|
||
else:
|
||
swap_min_sigkill_kb = swap_min_sigkill_tuple[0]
|
||
|
||
|
||
swap_warn_is_percent = swap_min_warnings_tuple[1]
|
||
if swap_warn_is_percent:
|
||
swap_min_warnings_percent = swap_min_warnings_tuple[0]
|
||
else:
|
||
swap_min_warnings_kb = swap_min_warnings_tuple[0]
|
||
|
||
|
||
##########################################################################
|
||
|
||
if print_config:
|
||
|
||
print(
|
||
'\n1. Memory levels to respond to as an OOM threat\n[displaying these options need fix]\n')
|
||
|
||
print('mem_min_sigterm: {} MiB, {} %'.format(
|
||
round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1)))
|
||
print('mem_min_sigkill: {} MiB, {} %'.format(
|
||
round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1)))
|
||
|
||
print('swap_min_sigterm: {}'.format(swap_min_sigterm))
|
||
print('swap_min_sigkill: {}'.format(swap_min_sigkill))
|
||
|
||
print('zram_max_sigterm: {} MiB, {} %'.format(
|
||
round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1)))
|
||
print('zram_max_sigkill: {} MiB, {} %'.format(
|
||
round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1)))
|
||
|
||
print('\n2. The frequency of checking the level of available memory (and CPU usage)\n')
|
||
print('rate_mem: {}'.format(rate_mem))
|
||
print('rate_swap: {}'.format(rate_swap))
|
||
print('rate_zram: {}'.format(rate_zram))
|
||
|
||
print('\n3. The prevention of killing innocent victims\n')
|
||
print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm))
|
||
print('min_delay_after_sigkill: {}'.format(min_delay_after_sigkill))
|
||
print('min_badness: {}'.format(min_badness))
|
||
|
||
# False (OK) - OK не нужен когда фолс
|
||
print('decrease_oom_score_adj: {}'.format(
|
||
decrease_oom_score_adj
|
||
))
|
||
if decrease_oom_score_adj:
|
||
print('oom_score_adj_max: {}'.format(oom_score_adj_max))
|
||
|
||
print('\n4. Impact on the badness of processes via matching their'
|
||
' names, cmdlines ir UIDs with regular expressions\n')
|
||
|
||
print('(todo)')
|
||
|
||
print('\n5. The execution of a specific command instead of sending the\nSIGTERM signal\n')
|
||
print('execute_the_command: {}'.format(execute_the_command))
|
||
if execute_the_command:
|
||
print('\nPROCESS NAME COMMAND TO EXECUTE')
|
||
for key in etc_dict:
|
||
print('{} {}'.format(key.ljust(15), etc_dict[key]))
|
||
|
||
print('\n6. GUI notifications:\n- OOM prevention results and\n- low memory warnings\n')
|
||
print('gui_notifications: {}'.format(gui_notifications))
|
||
|
||
print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings))
|
||
if gui_low_memory_warnings:
|
||
print('min_time_between_warnings: {}'.format(min_time_between_warnings))
|
||
|
||
print('mem_min_warnings: {} MiB, {} %'.format(
|
||
round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1)))
|
||
|
||
print('swap_min_warnings: {}'.format(swap_min_warnings))
|
||
|
||
print('zram_max_warnings: {} MiB, {} %'.format(
|
||
round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1)))
|
||
|
||
print('\n7. Output verbosity\n')
|
||
print('print_config: {}'.format(print_config))
|
||
print('print_mem_check_results: {}'.format(print_mem_check_results))
|
||
print('print_sleep_periods: {}\n'.format(print_sleep_periods))
|
||
|
||
|
||
##########################################################################
|
||
|
||
|
||
# for calculating the column width when printing mem and zram
|
||
mem_len = len(str(round(mem_total / 1024.0)))
|
||
|
||
if gui_notifications or gui_low_memory_warnings:
|
||
from subprocess import Popen, TimeoutExpired
|
||
notify_sig_dict = {SIGKILL: 'Killing',
|
||
SIGTERM: 'Terminating'}
|
||
|
||
rate_mem = rate_mem * 1048576
|
||
rate_swap = rate_swap * 1048576
|
||
rate_zram = rate_zram * 1048576
|
||
|
||
warn_time_now = 0
|
||
warn_time_delta = 1000
|
||
warn_timer = 0
|
||
|
||
print('Monitoring started!')
|
||
|
||
stdout.flush()
|
||
|
||
sigterm_psi = sigterm_psi_avg10
|
||
sigkill_psi = sigkill_psi_avg10
|
||
psi_min_sleep_time_after_action = psi_avg10_sleep_time
|
||
|
||
|
||
##########################################################################
|
||
|
||
if psi_support and not ignore_psi:
|
||
kill_psi_t0 = time() + psi_avg10_sleep_time
|
||
term_psi_t0 = time() + psi_avg10_sleep_time
|
||
|
||
avg_value = ''
|
||
|
||
|
||
while True:
|
||
|
||
if psi_support and not ignore_psi:
|
||
|
||
avg10 = psi_mem_some_avg10()
|
||
|
||
if print_mem_check_results:
|
||
avg_value = 'PSI mem some avg10: {} | '.format(str(avg10).rjust(6))
|
||
|
||
if avg10 >= sigkill_psi and time() - kill_psi_t0 >= psi_min_sleep_time_after_action:
|
||
time0 = time()
|
||
mem_info = 'avg ({}) > sigkill_psi ({})'.format(avg10, sigkill_psi)
|
||
find_victim_and_send_signal(SIGKILL)
|
||
kill_psi_t0 = time()
|
||
elif avg10 >= sigterm_psi and time() - term_psi_t0 >= psi_min_sleep_time_after_action:
|
||
time0 = time()
|
||
mem_info = 'avg ({}) > sigterm_psi ({})'.format(avg10, sigterm_psi)
|
||
find_victim_and_send_signal(SIGTERM)
|
||
term_psi_t0 = time()
|
||
else:
|
||
pass
|
||
|
||
mem_available, swap_total, swap_free = check_mem_and_swap()
|
||
|
||
# если метры - получаем киб выше и сразу. см.
|
||
|
||
# if swap_min_sigkill is set in percent
|
||
if swap_kill_is_percent:
|
||
swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0
|
||
|
||
if swap_term_is_percent:
|
||
swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0
|
||
|
||
if swap_warn_is_percent:
|
||
swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0
|
||
|
||
# в общем случае для работы нужны килобайты. Если в процентах задано -
|
||
# находим КБ тут, после получения своптотал.
|
||
|
||
mem_used_zram = check_zram()
|
||
|
||
if print_mem_check_results:
|
||
|
||
# Calculate 'swap-column' width
|
||
swap_len = len(str(round(swap_total / 1024.0)))
|
||
|
||
# Output avialable mem sizes
|
||
if swap_total == 0 and mem_used_zram == 0:
|
||
print('{}MemAvail: {} M, {} %'.format(
|
||
avg_value,
|
||
human(mem_available, mem_len),
|
||
just_percent_mem(mem_available / mem_total)))
|
||
|
||
elif swap_total > 0 and mem_used_zram == 0:
|
||
print('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %'.format(
|
||
avg_value,
|
||
human(mem_available, mem_len),
|
||
just_percent_mem(mem_available / mem_total),
|
||
human(swap_free, swap_len),
|
||
just_percent_swap(swap_free / (swap_total + 0.1))))
|
||
|
||
else:
|
||
print('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
|
||
'UsedZram: {} M, {} %'.format(
|
||
avg_value,
|
||
human(mem_available, mem_len),
|
||
just_percent_mem(mem_available / mem_total),
|
||
human(swap_free, swap_len),
|
||
just_percent_swap(swap_free / (swap_total + 0.1)),
|
||
human(mem_used_zram, mem_len),
|
||
just_percent_mem(mem_used_zram / mem_total)))
|
||
|
||
# если swap_min_sigkill задан в абсолютной величине и Swap_total = 0
|
||
if swap_total > swap_min_sigkill_kb: # If swap_min_sigkill is absolute
|
||
swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1))
|
||
else:
|
||
swap_sigkill_pc = '-'
|
||
|
||
if swap_total > swap_min_sigterm_kb:
|
||
swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1))
|
||
else:
|
||
|
||
# печатать так: SwapTotal = 0, ignore swapspace
|
||
swap_sigterm_pc = '-'
|
||
|
||
# это для печати меминфо. Все переработать нахрен.
|
||
|
||
# далее пошло ветвление
|
||
|
||
# MEM SWAP KILL
|
||
if mem_available <= mem_min_sigkill_kb and \
|
||
swap_free <= swap_min_sigkill_kb:
|
||
time0 = time()
|
||
|
||
mem_info = '{}\n\033[4mMemory status that r' \
|
||
'equires corrective actions:' \
|
||
'\033[0m\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
||
'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
||
'p_min_sigkill [{} MiB, {} %]'.format(
|
||
HR,
|
||
kib_to_mib(mem_available),
|
||
percent(mem_available / mem_total),
|
||
kib_to_mib(mem_min_sigkill_kb),
|
||
percent(mem_min_sigkill_kb / mem_total),
|
||
kib_to_mib(swap_free),
|
||
percent(swap_free / (swap_total + 0.1)),
|
||
kib_to_mib(swap_min_sigkill_kb),
|
||
swap_sigkill_pc)
|
||
|
||
find_victim_and_send_signal(SIGKILL)
|
||
kill_psi_t0 = time()
|
||
term_psi_t0 = time()
|
||
|
||
# ZRAM KILL
|
||
elif mem_used_zram >= zram_max_sigkill_kb:
|
||
time0 = time()
|
||
|
||
mem_info = '{}\n\033[4mMemory statu' \
|
||
's that requires corrective actions:' \
|
||
'\033[0m\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
|
||
'kill [{} MiB, {} %]'.format(
|
||
HR,
|
||
kib_to_mib(mem_used_zram),
|
||
percent(mem_used_zram / mem_total),
|
||
kib_to_mib(zram_max_sigkill_kb),
|
||
percent(zram_max_sigkill_kb / mem_total))
|
||
|
||
find_victim_and_send_signal(SIGKILL)
|
||
kill_psi_t0 = time()
|
||
term_psi_t0 = time()
|
||
|
||
# MEM SWAP TERM
|
||
elif mem_available <= mem_min_sigterm_kb and \
|
||
swap_free <= swap_min_sigterm_kb:
|
||
|
||
time0 = time()
|
||
|
||
mem_info = '{}\n\033[4mMemory status tha' \
|
||
't requires corrective actions:' \
|
||
'\033[0m\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
|
||
'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
|
||
'p_min_sigterm [{} MiB, {} %]'.format(
|
||
HR,
|
||
kib_to_mib(mem_available),
|
||
percent(mem_available / mem_total),
|
||
kib_to_mib(mem_min_sigterm_kb),
|
||
# percent(mem_min_sigterm_kb / mem_total),
|
||
# ОКРУГЛЯТЬ НА МЕСТЕ ВЫШЕ (или не выше, хз)
|
||
round(mem_min_sigterm_percent, 1),
|
||
kib_to_mib(swap_free),
|
||
percent(swap_free / (swap_total + 0.1)),
|
||
kib_to_mib(swap_min_sigterm_kb),
|
||
swap_sigterm_pc)
|
||
|
||
find_victim_and_send_signal(SIGTERM)
|
||
kill_psi_t0 = time()
|
||
term_psi_t0 = time()
|
||
|
||
# ZRAM TERM
|
||
elif mem_used_zram >= zram_max_sigterm_kb:
|
||
time0 = time()
|
||
|
||
mem_info = '{}\n\033[4mMemory status that r' \
|
||
'equires corrective actions:' \
|
||
'\033[0m\n MemUsedZram [{} MiB, {} %] >= ' \
|
||
'zram_max_sigterm [{} M, {} %]'.format(
|
||
HR,
|
||
kib_to_mib(mem_used_zram),
|
||
percent(mem_used_zram / mem_total),
|
||
kib_to_mib(zram_max_sigterm_kb),
|
||
percent(zram_max_sigterm_kb / mem_total))
|
||
|
||
find_victim_and_send_signal(SIGTERM)
|
||
kill_psi_t0 = time()
|
||
term_psi_t0 = time()
|
||
|
||
# LOW MEMORY WARNINGS
|
||
elif gui_low_memory_warnings:
|
||
|
||
if mem_available <= mem_min_warnings_kb and \
|
||
swap_free <= swap_min_warnings_kb + 0.1 or \
|
||
mem_used_zram >= zram_max_warnings_kb:
|
||
warn_time_delta = time() - warn_time_now
|
||
warn_time_now = time()
|
||
warn_timer += warn_time_delta
|
||
if warn_timer > min_time_between_warnings:
|
||
t0 = time()
|
||
send_notify_warn()
|
||
print(time() - t0, 'send notify warning time')
|
||
warn_timer = 0
|
||
|
||
sleep_after_check_mem()
|
||
|
||
# SLEEP BETWEEN MEM CHECKS
|
||
else:
|
||
sleep_after_check_mem()
|