nohang/nohang
2019-01-02 18:23:48 +09:00

1751 lines
53 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""A daemon that prevents OOM in Linux systems."""
from time import sleep, time
start_time = time()
import os
from operator import itemgetter
# this is most slow import
from argparse import ArgumentParser
from sys import stdout, stderr
from signal import SIGKILL, SIGTERM
sig_dict = {SIGKILL: 'SIGKILL',
SIGTERM: 'SIGTERM'}
'''
nm = 30
nc = nm + 1
'''
self_uid = os.geteuid()
self_pid = str(os.getpid())
wait_time = 2
cache_time = 30
cache_path = '/dev/shm/nohang_env_cache'
psi_path = '/proc/pressure/memory'
psi_support = os.path.exists(psi_path)
##########################################################################
# function definition section
def psi_mem_some_avg_total():
if psi_support:
return float(rline1(psi_path).rpartition('=')[2])
def check_mem():
"""find mem_available"""
return int(rline1('/proc/meminfo').split(':')[1].strip(' kB\n'))
def check_mem_and_swap():
"""find mem_available, swap_total, swap_free"""
with open('/proc/meminfo') as f:
for n, line in enumerate(f):
if n is 2:
mem_available = int(line.split(':')[1].strip(' kB\n'))
continue
if n is swap_total_index:
swap_total = int(line.split(':')[1].strip(' kB\n'))
continue
if n is swap_free_index:
swap_free = int(line.split(':')[1].strip(' kB\n'))
break
return mem_available, swap_total, swap_free
def check_zram():
"""find MemUsedZram"""
disksize_sum = 0
mem_used_total_sum = 0
for dev in os.listdir('/sys/block'):
if dev.startswith('zram'):
stat = zram_stat(dev)
disksize_sum += int(stat[0])
mem_used_total_sum += int(stat[1])
ZRAM_DISKSIZE_FACTOR = 0.0042
# Означает, что при задани zram disksize = 1 GiB доступная память
# уменьшится на 0.0042 GiB.
# Найден экспериментально, требует уточнения с разными ядрами и архитектурами.
# На небольших дисксайзах (до гигабайта) может быть больше, до 0.0045.
# Создатель модуля zram утверждает, что ZRAM_DISKSIZE_FACTOR доожен быть 0.001:
# ("zram uses about 0.1% of the size of the disk"
# - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
# но это утверждение противоречит опытным данным.
# ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize
# found experimentally
return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
def format_time(t):
t = int(t)
if t < 60:
return '{} sec'.format(t)
elif t >= 60 and t < 3600:
m = t // 60
s = t % 60
return '{} min {} sec'.format(m, s)
else:
h = t // 3600
s0 = t - h * 3600
m = s0 // 60
s = s0 % 60
return '{} h {} min {} sec'.format(h, m, s)
def re_pid_environ(pid):
"""
read environ of 1 process
returns tuple with USER, DBUS, DISPLAY like follow:
('user', 'DISPLAY=:0',
'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus')
returns None if these vars is not in /proc/[pid]/environ
"""
display_env = 'DISPLAY='
dbus_env = 'DBUS_SESSION_BUS_ADDRESS='
user_env = 'USER='
try:
env = str(rline1('/proc/' + pid + '/environ'))
if display_env in env and dbus_env in env and user_env in env:
env_list = env.split('\x00')
# iterating over a list of process environment variables
for i in env_list:
if i.startswith(user_env):
user = i
continue
if i.startswith(display_env):
display = i[:10]
continue
if i.startswith(dbus_env):
if ',guid=' in i:
return None
dbus = i
continue
if i.startswith('HOME='):
# exclude Display Manager's user
if i.startswith('HOME=/var'):
return None
env = user.partition('USER=')[2], display, dbus
return env
except FileNotFoundError:
pass
except ProcessLookupError:
pass
def root_notify_env():
"""return set(user, display, dbus)"""
unsorted_envs_list = []
# iterates over processes, find processes with suitable env
for pid in os.listdir('/proc'):
if pid[0].isdecimal() is False:
continue
one_env = re_pid_environ(pid)
unsorted_envs_list.append(one_env)
env = set(unsorted_envs_list)
env.discard(None)
return env
def string_to_float_convert_test(string):
"""Try to interprete string values as floats."""
try:
return float(string)
except ValueError:
return None
def string_to_int_convert_test(string):
"""Try to interpret string values as integers."""
try:
return int(string)
except ValueError:
return None
# extracting the parameter from the config dictionary, str return
def conf_parse_string(param):
"""
Get string parameters from the config dict.
param: config_dict key
returns config_dict[param].strip()
"""
if param in config_dict:
return config_dict[param].strip()
else:
print('All the necessary parameters must be in the config')
print('There is no "{}" parameter in the config'.format(param))
exit()
# extracting the parameter from the config dictionary, bool return
def conf_parse_bool(param):
"""
Get bool parameters from the config_dict.
param: config_dicst key
returns bool
"""
if param in config_dict:
param_str = config_dict[param]
if param_str == 'True':
return True
elif param_str == 'False':
return False
else:
print('Invalid value of the "{}" parameter.'.format(param_str))
print('Valid values are True and False.')
print('Exit')
exit()
else:
print('All the necessary parameters must be in the config')
print('There is no "{}" parameter in the config'.format(param_str))
exit()
def rline1(path):
"""read 1st line from path."""
with open(path) as f:
for line in f:
return line[:-1]
def write(path, string):
"""Write string to path."""
with open(path, 'w') as f:
f.write(string)
def kib_to_mib(num):
"""Convert KiB values to MiB values."""
return round(num / 1024.0)
def percent(num):
"""Interprete mum as percentage."""
return round(num * 100, 1)
def just_percent_mem(num):
"""convert num to percent and justify"""
return str(round(num * 100, 1)).rjust(4, ' ')
def just_percent_swap(num):
return str(round(num * 100, 1)).rjust(5, ' ')
def human(num, lenth):
"""Convert KiB values to MiB values with right alignment"""
return str(round(num / 1024)).rjust(lenth, ' ')
# return str with amount of bytes
def zram_stat(zram_id):
"""
Get zram state.
zram_id: str zram block-device id
returns bytes diskcize, str mem_used_total
"""
try:
disksize = rline1('/sys/block/' + zram_id + '/disksize')
except FileNotFoundError:
return '0', '0'
if disksize == ['0\n']:
return '0', '0'
try:
mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ')
mm_stat_list = []
for i in mm_stat:
if i != '':
mm_stat_list.append(i)
mem_used_total = mm_stat_list[2]
except FileNotFoundError:
mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total')
return disksize, mem_used_total # BYTES, str
# return process name
def pid_to_name(pid):
"""
Get process name by pid.
pid: str pid of required process
returns string process_name
"""
try:
with open('/proc/' + pid + '/status') as f:
for line in f:
return line[:-1].split('\t')[1]
except FileNotFoundError:
return ''
except ProcessLookupError:
return ''
def pid_to_cmdline(pid):
"""
Get process cmdline by pid.
pid: str pid of required process
returns string cmdline
"""
with open('/proc/' + pid + '/cmdline') as file:
try:
return file.readlines()[0].replace('\x00', ' ').strip()
except IndexError:
return ''
def pid_to_uid(pid):
with open('/proc/' + pid + '/status') as f:
for n, line in enumerate(f):
if n is uid_index:
return line.split('\t')[1]
def send_notify_warn():
"""
Look for process with maximum 'badness' and warn user with notification.
"""
# find process with max badness
fat_tuple = fattest()
pid = fat_tuple[0]
name = pid_to_name(pid)
if mem_used_zram > 0:
low_mem_percent = '{}% {}% {}%'.format(
round(mem_available / mem_total * 100),
round(swap_free / (swap_total + 0.1) * 100),
round(mem_used_zram / mem_total * 100))
elif swap_free > 0:
low_mem_percent = '{}% {}%'.format(
round(mem_available / mem_total * 100),
round(swap_free / (swap_total + 0.1) * 100))
else:
low_mem_percent = '{}%'.format(
round(mem_available / mem_total * 100))
title = 'Low memory: {}'.format(low_mem_percent)
body = 'Fattest process: <b>{}</b>, <b>{}</b>'.format(pid, name)
if root: # If nohang was started by root
# send notification to all active users with special script
# теперь можно напрямую уведомлять из кэша если он не устарел
Popen([
'/usr/bin/nohang_notify_low_mem',
'--mem', low_mem_percent,
'--pid', pid,
'--name', name
])
else: # Or by regular user
# send notification to user that runs this nohang
try:
Popen(['notify-send', '--icon=dialog-warning',
'{}'.format(title), '{}'.format(body)]).wait(wait_time)
except TimeoutExpired:
print('TimeoutExpired: ' + 'notify low mem')
def send_notify(signal, name, pid):
"""
Notificate about OOM Preventing.
signal: key for notify_sig_dict
name: str process name
pid: str process pid
"""
title = 'Preventing OOM'
body = '<b>{}</b> process <b>{}</b>, <b>{}</b>'.format(
notify_sig_dict[signal], pid, name.replace(
# сивол & может ломать уведомления в некоторых темах оформления,
# поэтому заменяется на *
'&', '*'))
if root:
# send notification to all active users with notify-send
b = root_notify_env()
if len(b) > 0:
for i in b:
username, display_env, dbus_env = i[0], i[1], i[2]
try:
Popen(['sudo', '-u', username, 'env', display_env,
dbus_env, 'notify-send', '--icon=dialog-warning',
'{}'.format(title), '{}'.format(body)]).wait(wait_time)
except TimeoutExpired:
print('TimeoutExpired: ' + 'notify send signal')
else:
# send notification to user that runs this nohang
try:
Popen(['notify-send', '--icon=dialog-warning',
'{}'.format(title), '{}'.format(body)]).wait(wait_time)
except TimeoutExpired:
print('TimeoutExpired: ' + 'notify send signal')
def send_notify_etc(pid, name, command):
"""
Notificate about OOM Preventing.
command: str command that will be executed
name: str process name
pid: str process pid
"""
title = 'Preventing OOM'
body = 'Victim is process <b>{}</b>, <b>{}</b>\nExecute the command:\n<b>{}</b>'.format(
pid, name.replace('&', '*'), command.replace('&', '*'))
if root:
# send notification to all active users with notify-send
b = root_notify_env()
if len(b) > 0:
for i in b:
username, display_env, dbus_env = i[0], i[1], i[2]
try:
Popen(['sudo', '-u', username, 'env', display_env,
dbus_env, 'notify-send', '--icon=dialog-warning',
'{}'.format(title), '{}'.format(body)]).wait(wait_time)
except TimeoutExpired:
print('TimeoutExpired: notify run command')
else:
# send notification to user that runs this nohang
Popen(['notify-send', '--icon=dialog-warning', '{}'.format(title), '{}'
.format(body)])
def sleep_after_send_signal(signal):
"""
Sleeping after signal was sent.
signal: sent signal
"""
if signal is SIGKILL:
if print_sleep_periods:
print(' sleep', min_delay_after_sigkill)
sleep(min_delay_after_sigterm)
else:
if print_sleep_periods:
print(' sleep', min_delay_after_sigterm)
sleep(min_delay_after_sigterm)
def fattest():
"""
Find the process with highest badness and its badness adjustment
Return pid and badness
"""
pid_badness_list = []
for pid in os.listdir('/proc'):
# only directories whose names consist only of numbers, except /proc/1/
if pid[0].isdecimal() is False or pid is '1' or pid is self_pid:
continue
# find and modify badness (if it needs)
try:
badness = int(rline1('/proc/' + pid + '/oom_score'))
if decrease_oom_score_adj:
oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
if badness > oom_score_adj_max and oom_score_adj > 0:
badness = badness - oom_score_adj + oom_score_adj_max
if regex_matching:
name = pid_to_name(pid)
for re_tup in processname_re_list:
if search(re_tup[1], name) is not None:
if pid_to_cmdline(pid) == '':
# skip kthreads
continue
badness += int(re_tup[0])
if re_match_cmdline:
cmdline = pid_to_cmdline(pid)
if cmdline == '':
# skip kthreads
continue
for re_tup in cmdline_re_list:
if search(re_tup[1], cmdline) is not None:
badness += int(re_tup[0])
if re_match_uid:
uid = pid_to_uid(pid)
for re_tup in uid_re_list:
if search(re_tup[1], uid) is not None:
if pid_to_cmdline(pid) == '':
# skip kthreads
continue
badness += int(re_tup[0])
except FileNotFoundError:
continue
except ProcessLookupError:
continue
pid_badness_list.append((pid, badness))
# Make list of (pid, badness) tuples, sorted by 'badness' values
pid_tuple_list = sorted(
pid_badness_list, key=itemgetter(1), reverse=True)[0]
pid = pid_tuple_list[0]
# Get maximum 'badness' value
victim_badness = pid_tuple_list[1]
return pid, victim_badness
def find_victim_and_send_signal(signal):
"""
Find victim with highest badness and send SIGTERM/SIGKILL
"""
#print()
pid, victim_badness = fattest()
name = pid_to_name(pid)
if victim_badness >= min_badness:
# Try to send signal to found victim
# Get VmRSS and VmSwap and cmdline of victim process and try to send
# signal
try:
with open('/proc/' + pid + '/status') as f:
for n, line in enumerate(f):
if n is uid_index:
uid = line.split('\t')[1]
continue
if n is vm_size_index:
vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if n is vm_rss_index:
vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
continue
if detailed_rss:
if n is anon_index:
anon_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is file_index:
file_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is shmem_index:
shmem_rss = kib_to_mib(
int(line.split('\t')[1][:-4]))
continue
if n is vm_swap_index:
vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
break
with open('/proc/' + pid + '/cmdline') as file:
try:
cmdline = file.readlines()[0].replace('\x00', ' ')
except IndexError:
cmdline = ''
except FileNotFoundError:
pass
except ProcessLookupError:
pass
except IndexError:
pass
except ValueError:
pass
oom_score = rline1('/proc/' + pid + '/oom_score') # тут может быть FileNotFoundError!
oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
len_vm = len(str(vm_size))
if detailed_rss:
victim_info = '\033[4mFound the victim with highest badness:\033[0m' \
'\n Name: \033[33m{}\033[0m' \
'\n PID: \033[33m{}\033[0m' \
'\n UID: \033[33m{}\033[0m' \
'\n Badness: \033[33m{}\033[0m, ' \
'oom_score: \033[33m{}\033[0m, ' \
'oom_score_adj: \033[33m{}\033[0m' \
'\n VmSize: \033[33m{}\033[0m MiB' \
'\n VmRSS: \033[33m{}\033[0m MiB (' \
'Anon: \033[33m{}\033[0m MiB, ' \
'File: \033[33m{}\033[0m MiB, ' \
'Shmem: \033[33m{}\033[0m MiB)' \
'\n VmSwap: \033[33m{}\033[0m MiB' \
'\n CmdLine: \033[33m{}\033[0m'.format(
name, pid, uid,
victim_badness, oom_score, oom_score_adj,
vm_size, str(vm_rss).rjust(len_vm, ' '),
anon_rss, file_rss, shmem_rss,
str(vm_swap).rjust(len_vm, ' '), cmdline)
else:
victim_info = '\033[4mFound the victim with highest badness:\033[0m' \
'\n Name: \033[33m{}\033[0m' \
'\n PID: \033[33m{}\033[0m' \
'\n UID: \033[33m{}\033[0m' \
'\n Badness: \033[33m{}\033[0m, ' \
'oom_score: \033[33m{}\033[0m, ' \
'oom_score_adj: \033[33m{}\033[0m' \
'\n VmSize: \033[33m{}\033[0m MiB' \
'\n VmRSS: \033[33m{}\033[0m MiB' \
'\n VmSwap: \033[33m{}\033[0m MiB' \
'\n CmdLine: \033[33m{}\033[0m'.format(
name, pid, uid, victim_badness, vm_size,
str(vm_rss).rjust(len_vm, ' '),
str(vm_swap).rjust(len_vm, ' '), cmdline)
if execute_the_command and signal is SIGTERM and name in etc_dict:
command = etc_dict[name]
exit_status = os.system(etc_dict[name])
if exit_status == 0:
exit_status = '\033[32m0\033[0m'
else:
exit_status = '\033[31m{}\033[0m'.format(exit_status)
response_time = time() - time0
etc_info = '{}' \
'\n\033[4mImplement corrective action:\033[0m\n Execute the command: \033[4m{}\033[0m' \
'\n Exit status: {}; response time: {} ms'.format(
victim_info, command, exit_status,
round(response_time * 1000))
# update stat_dict
key = "Run the command '\033[35m{}\033[0m'".format(command)
if key not in stat_dict:
stat_dict.update({key: 1})
else:
new_value = stat_dict[key] + 1
stat_dict.update({key: new_value})
print(mem_info)
print(etc_info)
if gui_notifications:
send_notify_etc(pid, name, command)
else:
try:
os.kill(int(pid), signal)
response_time = time() - time0
send_result = '\033[32mOK\033[0m; response time: {} ms'.format(
round(response_time * 1000))
# update stat_dict
key = 'Send \033[35m{}\033[0m to \033[35m{}\033[0m'.format(sig_dict[signal], name)
if key not in stat_dict:
stat_dict.update({key: 1})
else:
new_value = stat_dict[key] + 1
stat_dict.update({key: new_value})
if gui_notifications:
send_notify(signal, name, pid)
except FileNotFoundError:
response_time = time() - time0
send_result = 'no such process; response time: {} ms'.format(
round(response_time * 1000))
except ProcessLookupError:
response_time = time() - time0
send_result = 'no such process; response time: {} ms'.format(
round(response_time * 1000))
preventing_oom_message = '{}' \
'\n\033[4mImplement corrective action:\033[0m\n ' \
'Sending \033[4m{}\033[0m to the victim; {}'.format(
victim_info, sig_dict[signal], send_result)
print(mem_info)
print(preventing_oom_message)
print('\n\033[4mDuration of work: {}; number of corrective actions:\033[0m'.format(
format_time(time() - start_time)))
for key in stat_dict:
print(' - {}: {}'.format(key, stat_dict[key]))
else:
response_time = time() - time0
victim_badness_is_too_small = ' victim badness {} < min_b' \
'adness {}; nothing to do; response time: {} ms'.format(
victim_badness,
min_badness,
round(response_time * 1000))
print(victim_badness_is_too_small)
sleep_after_send_signal(signal)
def sleep_after_check_mem():
"""Specify sleep times depends on rates and avialable memory."""
t_mem = mem_available / rate_mem
t_swap = swap_free / rate_swap
t_zram = (mem_total - mem_used_zram) / rate_zram
t_mem_swap = t_mem + t_swap
t_mem_zram = t_mem + t_zram
if t_mem_swap <= t_mem_zram:
t = t_mem_swap
else:
t = t_mem_zram
try:
if print_sleep_periods:
print('sleep', round(t, 2),
' (t_mem={}, t_swap={}, t_zram={})'.format(
round(t_mem, 2),
round(t_swap, 2),
round(t_zram, 2)))
stderr.flush()
sleep(t)
except KeyboardInterrupt:
exit()
def calculate_percent(arg_key):
"""
parse conf dict
Calculate mem_min_KEY_percent.
Try use this one)
arg_key: str key for config_dict
returns int mem_min_percent or NoneType if got some error
"""
if arg_key in config_dict:
mem_min = config_dict[arg_key]
if mem_min.endswith('%'):
# truncate percents, so we have a number
mem_min_percent = mem_min[:-1].strip()
# then 'float test'
mem_min_percent = string_to_float_convert_test(mem_min_percent)
if mem_min_percent is None:
print('Invalid {} value, not float\nExit'.format(arg_key))
exit()
# Final validations...
if mem_min_percent < 0 or mem_min_percent > 100:
print(
'{}, as percents value, out of range [0; 100]\nExit'.format(arg_key))
exit()
# mem_min_sigterm_percent is clean and valid float percentage. Can
# translate into Kb
mem_min_kb = mem_min_percent / 100 * mem_total
mem_min_mb = round(mem_min_kb / 1024)
elif mem_min.endswith('M'):
mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip())
if mem_min_mb is None:
print('Invalid {} value, not float\nExit'.format(arg_key))
exit()
mem_min_kb = mem_min_mb * 1024
if mem_min_kb > mem_total:
print(
'{} value can not be greater then MemTotal ({} MiB)\nExit'.format(
arg_key, round(
mem_total / 1024)))
exit()
mem_min_percent = mem_min_kb / mem_total * 100
else:
print('Invalid {} units in config.\n Exit'.format(arg_key))
mem_min_percent = None
else:
print('{} not in config\nExit'.format(arg_key))
mem_min_percent = None
return mem_min_kb, mem_min_mb, mem_min_percent
##########################################################################
# find mem_total
# find positions of SwapFree and SwapTotal in /proc/meminfo
with open('/proc/meminfo') as file:
mem_list = file.readlines()
mem_list_names = []
for s in mem_list:
mem_list_names.append(s.split(':')[0])
if mem_list_names[2] != 'MemAvailable':
print('Your Linux kernel is too old, Linux 3.14+ requied\nExit')
exit()
swap_total_index = mem_list_names.index('SwapTotal')
swap_free_index = swap_total_index + 1
mem_total = int(mem_list[0].split(':')[1].strip(' kB\n'))
# Get names from /proc/*/status to be able to get VmRSS and VmSwap values
with open('/proc/self/status') as file:
status_list = file.readlines()
status_names = []
for s in status_list:
status_names.append(s.split(':')[0])
vm_size_index = status_names.index('VmSize')
vm_rss_index = status_names.index('VmRSS')
vm_swap_index = status_names.index('VmSwap')
uid_index = status_names.index('Uid')
try:
anon_index = status_names.index('RssAnon')
file_index = status_names.index('RssFile')
shmem_index = status_names.index('RssShmem')
detailed_rss = True
#print(detailed_rss, 'detailed_rss')
except ValueError:
detailed_rss = False
#print('It is not Linux 4.5+')
##########################################################################
# Configurations
# directory where the script is running
cd = os.getcwd()
#print('CD:', cd)
# where to look for a config if not specified via the -c/--config option
default_configs = (cd + '/nohang.conf', '/etc/nohang/nohang.conf')
# universal message if config is invalid
conf_err_mess = '\nSet up the path to the valid conf' \
'ig file with -c/--config option!\nExit'
# Cmd argparse
parser = ArgumentParser()
parser.add_argument(
'-c',
'--config',
help="""path to the config file, default values:
./nohang.conf, /etc/nohang/nohang.conf""",
default=None,
type=str
)
args = parser.parse_args()
arg_config = args.config
if arg_config is None:
config = None
for i in default_configs:
if os.path.exists(i):
config = i
break
if config is None:
print('Default configuration was not found\n',
conf_err_mess)
exit()
else:
if os.path.exists(arg_config):
config = arg_config
else:
print("File {} doesn't exists{}".format(
arg_config, conf_err_mess))
exit()
print('The path to the config:', config)
##########################################################################
# parsing the config with obtaining the parameters dictionary
# conf_parameters_dict
# conf_restart_dict
# dictionary with config options
config_dict = dict()
processname_re_list = []
cmdline_re_list = []
uid_re_list = []
# dictionary with names and commands for the parameter
# execute_the_command
# тут тоже список нужен, а не словарь
etc_dict = dict()
# will store corrective actions stat
stat_dict = dict()
try:
with open(config) as f:
for line in f:
a = line.startswith('#')
b = line.startswith('\n')
c = line.startswith('\t')
d = line.startswith(' ')
etc = line.startswith('$ETC')
if not a and not b and not c and not d and not etc:
a = line.partition('=')
config_dict[a[0].strip()] = a[2].strip()
if etc:
a = line[4:].split('///')
etc_name = a[0].strip()
etc_command = a[1].strip()
if len(etc_name) > 15:
print('Invalid config, the length of the process '
'name must not exceed 15 characters\nExit')
exit()
etc_dict[etc_name] = etc_command
# NEED VALIDATION!
if line.startswith('@PROCESSNAME_RE'):
a = line.partition('@PROCESSNAME_RE')[2].strip(' \n').partition('///')
processname_re_list.append((a[0].strip(' '), a[2].strip(' ')))
if line.startswith('@CMDLINE_RE'):
a = line.partition('@CMDLINE_RE')[2].strip(' \n').partition('///')
cmdline_re_list.append((a[0].strip(' '), a[2].strip(' ')))
if line.startswith('@UID_RE'):
a = line.partition('@UID_RE')[2].strip(' \n').partition('///')
uid_re_list.append((a[0].strip(' '), a[2].strip(' ')))
except PermissionError:
print('PermissionError', conf_err_mess)
exit()
except UnicodeDecodeError:
print('UnicodeDecodeError', conf_err_mess)
exit()
except IsADirectoryError:
print('IsADirectoryError', conf_err_mess)
exit()
except IndexError:
print('IndexError', conf_err_mess)
exit()
# print(processname_re_list)
# print(cmdline_re_list)
# print(uid_re_list)
##########################################################################
# extracting parameters from the dictionary
# check for all necessary parameters
# validation of all parameters
print_config = conf_parse_bool('print_config')
print_mem_check_results = conf_parse_bool('print_mem_check_results')
print_sleep_periods = conf_parse_bool('print_sleep_periods')
realtime_ionice = conf_parse_bool('realtime_ionice')
mlockall = conf_parse_bool('mlockall')
gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings')
gui_notifications = conf_parse_bool('gui_notifications')
decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj')
execute_the_command = conf_parse_bool('execute_the_command')
regex_matching = conf_parse_bool('regex_matching')
re_match_cmdline = conf_parse_bool('re_match_cmdline')
re_match_uid = conf_parse_bool('re_match_uid')
if regex_matching or re_match_cmdline or re_match_uid:
from re import search
import sre_constants
mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent = calculate_percent(
'mem_min_sigterm')
mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent = calculate_percent(
'mem_min_sigkill')
zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent = calculate_percent(
'zram_max_sigterm')
zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent = calculate_percent(
'zram_max_sigkill')
mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent = calculate_percent(
'mem_min_warnings')
zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent = calculate_percent(
'zram_max_warnings')
if 'realtime_ionice_classdata' in config_dict:
realtime_ionice_classdata = string_to_int_convert_test(
config_dict['realtime_ionice_classdata'])
if realtime_ionice_classdata is None:
print('Invalid value of the "realtime_ionice_classdata" parameter.')
print('Valid values are integers from the range [0; 7].')
print('Exit')
exit()
if realtime_ionice_classdata < 0 or realtime_ionice_classdata > 7:
print('Invalid value of the "realtime_ionice_classdata" parameter.')
print('Valid values are integers from the range [0; 7].')
print('Exit')
exit()
else:
print('All the necessary parameters must be in the config')
print('There is no "realtime_ionice_classdata" parameter in the config')
exit()
if 'niceness' in config_dict:
niceness = string_to_int_convert_test(config_dict['niceness'])
if niceness is None:
print('Invalid niceness value, not integer\nExit')
exit()
if niceness < -20 or niceness > 19:
print('niceness out of range [-20; 19]\nExit')
exit()
else:
print('niceness not in config\nExit')
exit()
if 'oom_score_adj' in config_dict:
oom_score_adj = string_to_int_convert_test(
config_dict['oom_score_adj'])
if oom_score_adj is None:
print('Invalid oom_score_adj value, not integer\nExit')
exit()
if oom_score_adj < -1000 or oom_score_adj > 1000:
print('oom_score_adj out of range [-1000; 1000]\nExit')
exit()
else:
print('oom_score_adj not in config\nExit')
exit()
if 'rate_mem' in config_dict:
rate_mem = string_to_float_convert_test(config_dict['rate_mem'])
if rate_mem is None:
print('Invalid rate_mem value, not float\nExit')
exit()
if rate_mem <= 0:
print('rate_mem MUST be > 0\nExit')
exit()
else:
print('rate_mem not in config\nExit')
exit()
if 'rate_swap' in config_dict:
rate_swap = string_to_float_convert_test(config_dict['rate_swap'])
if rate_swap is None:
print('Invalid rate_swap value, not float\nExit')
exit()
if rate_swap <= 0:
print('rate_swap MUST be > 0\nExit')
exit()
else:
print('rate_swap not in config\nExit')
exit()
if 'rate_zram' in config_dict:
rate_zram = string_to_float_convert_test(config_dict['rate_zram'])
if rate_zram is None:
print('Invalid rate_zram value, not float\nExit')
exit()
if rate_zram <= 0:
print('rate_zram MUST be > 0\nExit')
exit()
else:
print('rate_zram not in config\nExit')
exit()
# НУЖНА ВАЛИДАЦИЯ НА МЕСТЕ!
if 'swap_min_sigterm' in config_dict:
swap_min_sigterm = config_dict['swap_min_sigterm']
else:
print('swap_min_sigterm not in config\nExit')
exit()
# НУЖНА ВАЛИДАЦИЯ НА МЕСТЕ!
if 'swap_min_sigkill' in config_dict:
swap_min_sigkill = config_dict['swap_min_sigkill']
else:
print('swap_min_sigkill not in config\nExit')
exit()
if 'min_delay_after_sigterm' in config_dict:
min_delay_after_sigterm = string_to_float_convert_test(
config_dict['min_delay_after_sigterm'])
if min_delay_after_sigterm is None:
print('Invalid min_delay_after_sigterm value, not float\nExit')
exit()
if min_delay_after_sigterm < 0:
print('min_delay_after_sigterm must be positiv\nExit')
exit()
else:
print('min_delay_after_sigterm not in config\nExit')
exit()
if 'min_delay_after_sigkill' in config_dict:
min_delay_after_sigkill = string_to_float_convert_test(
config_dict['min_delay_after_sigkill'])
if min_delay_after_sigkill is None:
print('Invalid min_delay_after_sigkill value, not float\nExit')
exit()
if min_delay_after_sigkill < 0:
print('min_delay_after_sigkill must be positiv\nExit')
exit()
else:
print('min_delay_after_sigkill not in config\nExit')
exit()
if 'min_badness' in config_dict:
min_badness = string_to_int_convert_test(
config_dict['min_badness'])
if min_badness is None:
print('Invalid min_badness value, not integer\nExit')
exit()
if min_badness < 0 or min_badness > 1000:
print('Invalud min_badness value\nExit')
exit()
else:
print('min_badness not in config\nExit')
exit()
if 'oom_score_adj_max' in config_dict:
oom_score_adj_max = string_to_int_convert_test(
config_dict['oom_score_adj_max'])
if oom_score_adj_max is None:
print('Invalid oom_score_adj_max value, not integer\nExit')
exit()
if oom_score_adj_max < 0 or oom_score_adj_max > 1000:
print('Invalid oom_score_adj_max value\nExit')
exit()
else:
print('oom_score_adj_max not in config\nExit')
exit()
if 'min_time_between_warnings' in config_dict:
min_time_between_warnings = string_to_float_convert_test(
config_dict['min_time_between_warnings'])
if min_time_between_warnings is None:
print('Invalid min_time_between_warnings value, not float\nExit')
exit()
if min_time_between_warnings < 1 or min_time_between_warnings > 300:
print('min_time_between_warnings value out of range [1; 300]\nExit')
exit()
else:
print('min_time_between_warnings not in config\nExit')
exit()
# НА МЕСТЕ!!!
if 'swap_min_warnings' in config_dict:
swap_min_warnings = config_dict['swap_min_warnings']
else:
print('swap_min_warnings not in config\nExit')
exit()
##########################################################################
# Get Kibibytes levels
# Returns Kibibytes value if absolute value was set in config,
# or tuple with percentage
def sig_level_to_kb_swap(string):
"""Returns Kibibytes value if abs val was set in config, or tuple with %"""
if string.endswith('%'):
return float(string[:-1].strip()), True
elif string.endswith('M'):
return float(string[:-1].strip()) * 1024
else:
print('Invalid config file. There are invalid units somewhere\nExit')
exit()
# So, get them
swap_min_sigterm_swap = sig_level_to_kb_swap(swap_min_sigterm)
swap_min_sigkill_swap = sig_level_to_kb_swap(swap_min_sigkill)
swap_min_warnings_swap = sig_level_to_kb_swap(swap_min_warnings)
if isinstance(swap_min_sigterm_swap, tuple):
swap_term_is_percent = True
swap_min_sigterm_percent = swap_min_sigterm_swap[0]
else:
swap_term_is_percent = False
swap_min_sigterm_kb = swap_min_sigterm_swap
if isinstance(swap_min_sigkill_swap, tuple):
swap_kill_is_percent = True
swap_min_sigkill_percent = swap_min_sigkill_swap[0]
else:
swap_kill_is_percent = False
swap_min_sigkill_kb = swap_min_sigkill_swap
if isinstance(swap_min_warnings_swap, tuple):
swap_warn_is_percent = True
swap_min_warnings_percent = swap_min_warnings_swap[0]
else:
swap_warn_is_percent = False
swap_min_warnings_kb = swap_min_warnings_swap
##########################################################################
# self-defense
# возожно стоит убрать поддержку mlockall и ionice
# Increase priority
try:
os.nice(niceness)
niceness_result = 'OK'
except PermissionError:
niceness_result = 'Fail'
pass
# Deny self-killing
try:
with open('/proc/self/oom_score_adj', 'w') as file:
file.write('{}\n'.format(oom_score_adj))
oom_score_adj_result = 'OK'
except PermissionError:
oom_score_adj_result = 'Fail'
except OSError:
oom_score_adj_result = 'Fail'
# Deny process swapping
if mlockall:
from ctypes import CDLL
result = CDLL('libc.so.6', use_errno=True).mlockall(3)
if result is 0:
mla_res = 'OK'
else:
mla_res = 'Fail'
else:
mla_res = ''
if self_uid == 0:
root = True
decrease_res = 'OK'
else:
root = False
decrease_res = 'Impossible'
if root and realtime_ionice:
os.system('ionice -c 1 -n {} -p {}'.format(
realtime_ionice_classdata, self_pid))
##########################################################################
if print_config:
print(
'\n1. Memory levels to respond to as an OOM threat\n[displaying these options need fix]\n')
print('mem_min_sigterm: {} MiB, {} %'.format(
round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1)))
print('mem_min_sigkill: {} MiB, {} %'.format(
round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1)))
print('swap_min_sigterm: {}'.format(swap_min_sigterm))
print('swap_min_sigkill: {}'.format(swap_min_sigkill))
print('zram_max_sigterm: {} MiB, {} %'.format(
round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1)))
print('zram_max_sigkill: {} MiB, {} %'.format(
round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1)))
print('\n2. The frequency of checking the level of available memory (and CPU usage)\n')
print('rate_mem: {}'.format(rate_mem))
print('rate_swap: {}'.format(rate_swap))
print('rate_zram: {}'.format(rate_zram))
print('\n3. The prevention of killing innocent victims\n')
print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm))
print('min_delay_after_sigkill: {}'.format(min_delay_after_sigkill))
print('min_badness: {}'.format(min_badness))
# False (OK) - OK не нужен когда фолс
print('decrease_oom_score_adj: {}'.format(
decrease_oom_score_adj
))
if decrease_oom_score_adj:
print('oom_score_adj_max: {}'.format(oom_score_adj_max))
print('\n4. Impact on the badness of processes via matching their' \
' names, cmdlines ir UIDs with regular expressions\n')
print('regex_matching: {}'.format(regex_matching))
if regex_matching:
print('prefer_regex: {}'.format(prefer_regex))
print('prefer_factor: {}'.format(prefer_factor))
print('avoid_regex: {}'.format(avoid_regex))
print('avoid_factor: {}'.format(avoid_factor))
print()
print('re_match_cmdline: {}'.format(re_match_cmdline))
if re_match_cmdline:
print('prefer_re_cmdline: {}'.format(prefer_re_cmdline))
print('prefer_cmd_factor: {}'.format(prefer_cmd_factor))
print('avoid_re_cmdline: {}'.format(avoid_re_cmdline))
print('avoid_cmd_factor: {}'.format(avoid_cmd_factor))
print()
print('re_match_uid: {}'.format(re_match_uid))
if re_match_uid:
print('prefer_re_uid: {}'.format(prefer_re_uid))
print('prefer_uid_factor: {}'.format(prefer_uid_factor))
print('avoid_re_uid: {}'.format(avoid_re_uid))
print('avoid_uid_factor: {}'.format(avoid_uid_factor))
print('\n5. The execution of a specific command instead of sending the\nSIGTERM signal\n')
print('execute_the_command: {}'.format(execute_the_command))
if execute_the_command:
print('\nPROCESS NAME COMMAND TO EXECUTE')
for key in etc_dict:
print('{} {}'.format(key.ljust(15), etc_dict[key]))
print('\n6. GUI notifications:\n- OOM prevention results and\n- low memory warnings\n')
print('gui_notifications: {}'.format(gui_notifications))
print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings))
if gui_low_memory_warnings:
print('min_time_between_warnings: {}'.format(min_time_between_warnings))
print('mem_min_warnings: {} MiB, {} %'.format(
round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1)))
print('swap_min_warnings: {}'.format(swap_min_warnings))
print('zram_max_warnings: {} MiB, {} %'.format(
round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1)))
print(
'\n7. Preventing the slowing down of the program'
'\n[displaying these options need fix]\n')
print('mlockall: {} ({})'.format(mlockall, mla_res))
print('niceness: {} ({})'.format(
niceness, niceness_result
))
print('oom_score_adj: {} ({})'.format(
oom_score_adj, oom_score_adj_result
))
print('realtime_ionice: {} ({})'.format(realtime_ionice, ''))
if realtime_ionice:
print('realtime_ionice_classdata: {}'.format(realtime_ionice_classdata))
print('\n8. Output verbosity\n')
print('print_config: {}'.format(print_config))
print('print_mem_check_results: {}'.format(print_mem_check_results))
print('print_sleep_periods: {}\n'.format(print_sleep_periods))
##########################################################################
# for calculating the column width when printing mem and zram
mem_len = len(str(round(mem_total / 1024.0)))
if gui_notifications or gui_low_memory_warnings:
from subprocess import Popen, TimeoutExpired
notify_sig_dict = {SIGKILL: 'Killing',
SIGTERM: 'Terminating'}
rate_mem = rate_mem * 1048576
rate_swap = rate_swap * 1048576
rate_zram = rate_zram * 1048576
warn_time_now = 0
warn_time_delta = 1000
warn_timer = 0
x = time() - start_time
print('The duration of startup:',
round(x * 1000, 1), 'ms')
print('Monitoring started!')
def save_env_cache():
z = '{}\n'.format(int(time()))
a = root_notify_env()
#print(a)
for i in a:
z = z + '{}\x00{}\x00{}\n'.format(i[0], i[1], i[2])
write(cache_path, z)
os.chmod(cache_path, 0000)
return a
def read_env_cache():
x, y = [], []
try:
with open(cache_path) as f:
for n, line in enumerate(f):
if n is 0:
t = line[:-1]
y.append(t)
continue
if n > 0:
x.append(line[:-1].split('\x00'))
except FileNotFoundError:
return None
y.append(x)
return y
def root_env_cache():
cache = read_env_cache()
if cache is None:
print('cache not found, get new env and cache it')
return save_env_cache()
delta_t = time() - int(cache[0])
if delta_t > cache_time:
print('cache time: {}, delta: {}, ' \
'get new env and cache it'.format(
cache_time, round(delta_t)))
save_env_cache()
return root_notify_env()
else:
print('cache time: {}, delta: {}, ' \
'get cached env'.format(
cache_time, round(delta_t)))
return cache[1]
t1 = time()
# root_env_cache()
t2 = time()
# print(t2 - t1)
stdout.flush()
#exit()
# ввести через конфиг!
sigterm_psi = 60
sigkill_psi = 95
avg_min_time = 4
psi_min_sleep_time_after_action = 16
##########################################################################
if psi_support:
ta0 = time()
a0 = psi_mem_some_avg_total()
kill_psi_t0 = time()
term_psi_t0 = time()
while True:
if psi_support:
ta1= time()
dt = ta1 - ta0
if dt >= avg_min_time:
a1 = psi_mem_some_avg_total()
avg = (a1 - a0) / (ta1 - ta0) / 10000
print(rline1(psi_path))
print('PSI mem some avg: {}, PSI avg time: {}'.format(round(avg, 2), round(dt, 1)))
ta0 = ta1
a0 = a1
if avg >= sigkill_psi and time() - kill_psi_t0 >= psi_min_sleep_time_after_action:
time0 = time()
mem_info = 'avg ({}) > sigkill_psi ({})'.format(round(avg, 2), sigkill_psi)
find_victim_and_send_signal(SIGKILL)
kill_psi_t0 = time()
elif avg >= sigterm_psi and time() - term_psi_t0 >= psi_min_sleep_time_after_action:
time0 = time()
mem_info = 'avg ({}) > sigterm_psi ({})'.format(round(avg, 2), sigterm_psi)
find_victim_and_send_signal(SIGTERM)
term_psi_t0 = time()
else:
print('PSI is OK or psi_min_sleep_time_after_action did not pass')
mem_available, swap_total, swap_free = check_mem_and_swap()
# if swap_min_sigkill is set in percent
if swap_kill_is_percent:
swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0
if swap_term_is_percent:
swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0
if swap_warn_is_percent:
swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0
mem_used_zram = check_zram()
if print_mem_check_results:
# Calculate 'swap-column' width
swap_len = len(str(round(swap_total / 1024.0)))
# Output avialable mem sizes
if swap_total == 0 and mem_used_zram == 0:
print('MemAvail: {} M, {} %'.format(
human(mem_available, mem_len),
just_percent_mem(mem_available / mem_total)))
elif swap_total > 0 and mem_used_zram == 0:
print('MemAvail: {} M, {} % | SwapFree: {} M, {} %'.format(
human(mem_available, mem_len),
just_percent_mem(mem_available / mem_total),
human(swap_free, swap_len),
just_percent_swap(swap_free / (swap_total + 0.1))))
else:
print('MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
'UsedZram: {} M, {} %'.format(
human(mem_available, mem_len),
just_percent_mem(mem_available / mem_total),
human(swap_free, swap_len),
just_percent_swap(swap_free / (swap_total + 0.1)),
human(mem_used_zram, mem_len),
just_percent_mem(mem_used_zram / mem_total)))
# если swap_min_sigkill задан в абсолютной величине и Swap_total = 0
if swap_total > swap_min_sigkill_kb: # If swap_min_sigkill is absolute
swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1))
else:
swap_sigkill_pc = '-'
if swap_total > swap_min_sigterm_kb:
swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1))
else:
# печатать так: SwapTotal = 0, ignore swapspace
swap_sigterm_pc = '-'
# далее пошло ветвление
# MEM SWAP KILL
if mem_available <= mem_min_sigkill_kb and \
swap_free <= swap_min_sigkill_kb:
time0 = time()
mem_info = '\n\033[4mMemory status that requires corrective actions:' \
'\033[0m\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
'p_min_sigkill [{} MiB, {} %]'.format(
kib_to_mib(mem_available),
percent(mem_available / mem_total),
kib_to_mib(mem_min_sigkill_kb),
percent(mem_min_sigkill_kb / mem_total),
kib_to_mib(swap_free),
percent(swap_free / (swap_total + 0.1)),
kib_to_mib(swap_min_sigkill_kb),
swap_sigkill_pc)
find_victim_and_send_signal(SIGKILL)
# ZRAM KILL
elif mem_used_zram >= zram_max_sigkill_kb:
time0 = time()
mem_info = '\n\033[4mMemory status that requires corrective actions:' \
'\033[0m\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
'kill [{} MiB, {} %]'.format(
kib_to_mib(mem_used_zram),
percent(mem_used_zram / mem_total),
kib_to_mib(zram_max_sigkill_kb),
percent(zram_max_sigkill_kb / mem_total))
find_victim_and_send_signal(SIGKILL)
# MEM SWAP TERM
elif mem_available <= mem_min_sigterm_kb and \
swap_free <= swap_min_sigterm_kb:
time0 = time()
mem_info = '\n\033[4mMemory status that requires corrective actions:' \
'\033[0m\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
'p_min_sigterm [{} MiB, {} %]'.format(
kib_to_mib(mem_available),
percent(mem_available / mem_total),
kib_to_mib(mem_min_sigterm_kb),
#percent(mem_min_sigterm_kb / mem_total),
# ОКРУГЛЯТЬ НА МЕСТЕ ВЫШЕ
round(mem_min_sigterm_percent, 1),
kib_to_mib(swap_free),
percent(swap_free / (swap_total + 0.1)),
kib_to_mib(swap_min_sigterm_kb),
swap_sigterm_pc)
find_victim_and_send_signal(SIGTERM)
# ZRAM TERM
elif mem_used_zram >= zram_max_sigterm_kb:
time0 = time()
mem_info = '\n\033[4mMemory status that requires corrective actions:' \
'\033[0m\n MemUsedZram [{} MiB, {} %] >= ' \
'zram_max_sigterm [{} M, {} %]'.format(
kib_to_mib(mem_used_zram),
percent(mem_used_zram / mem_total),
kib_to_mib(zram_max_sigterm_kb),
percent(zram_max_sigterm_kb / mem_total))
find_victim_and_send_signal(SIGTERM)
# LOW MEMORY WARNINGS
elif gui_low_memory_warnings:
if mem_available <= mem_min_warnings_kb and \
swap_free <= swap_min_warnings_kb + 0.1 or \
mem_used_zram >= zram_max_warnings_kb:
warn_time_delta = time() - warn_time_now
warn_time_now = time()
warn_timer += warn_time_delta
if warn_timer > min_time_between_warnings:
send_notify_warn()
warn_timer = 0
sleep_after_check_mem()
# SLEEP BETWEEN MEM CHECKS
else:
sleep_after_check_mem()