fix logging; pep8 validation; add forbid_negative_badness and log_dir options

This commit is contained in:
Alexey Avramov 2019-03-20 17:47:14 +09:00
parent db0eea2213
commit a6171e85b8
4 changed files with 232 additions and 152 deletions

View File

@ -4,29 +4,27 @@ PREFIX = /
all:
@ echo "Nothing to compile. Use: make install, make uninstall, make systemd"
install:
install:
install -d $(DESTDIR)/$(PREFIX)/usr/sbin
install -m0755 ./nohang $(DESTDIR)/$(PREFIX)/usr/sbin/nohang
install -m0755 ./nohang_notify_helper $(DESTDIR)/$(PREFIX)/usr/sbin/nohang_notify_helper
install -d $(DESTDIR)/$(PREFIX)/usr/bin
install -m0755 ./oom-sort $(DESTDIR)/$(PREFIX)/usr/bin/oom-sort
install -m0755 ./oom-trigger $(DESTDIR)/$(PREFIX)/usr/bin/oom-trigger
install -d $(DESTDIR)/$(PREFIX)/etc/nohang
install -m0644 ./nohang.conf $(DESTDIR)/$(PREFIX)/etc/nohang/$(VERSION)
install -m0644 ./nohang.conf $(DESTDIR)/$(PREFIX)/etc/nohang/nohang.conf.default
install -d $(DESTDIR)/$(PREFIX)/var/log/nohang
install -d $(DESTDIR)/$(PREFIX)/usr/share/man/man1
gzip -k -c nohang.1 > $(DESTDIR)/$(PREFIX)/usr/share/man/man1/nohang.1.gz
gzip -k -c oom-sort.1 > $(DESTDIR)/$(PREFIX)/usr/share/man/man1/oom-sort.1.gz
gzip -k -c oom-trigger.1 > $(DESTDIR)/$(PREFIX)/usr/share/man/man1/oom-trigger.1.gz
install -d $(DESTDIR)/$(PREFIX)/lib/systemd/system
install -m0644 ./nohang.service $(DESTDIR)/$(PREFIX)/lib/systemd/system/nohang.service
uninstall:
# 'make uninstall' must not fail with error if systemctl is unavailable or returns error
systemctl disable nohang.service || true
@ -40,7 +38,7 @@ uninstall:
rm -fv $(PREFIX)/lib/systemd/system/nohang.service
rm -fvr $(PREFIX)/etc/nohang/
rm -fvr $(PREFIX)/var/log/nohang/
systemd:
systemctl daemon-reload
systemctl enable nohang.service

284
nohang
View File

@ -9,32 +9,10 @@ from sys import stdout, stderr, argv, exit
from signal import SIGKILL, SIGTERM
import sys
import logging
from logging import basicConfig
from logging import info
start_time = time()
logfile = '/var/log/nohang/nohang.log'
basicConfig(filename=logfile,
level=logging.INFO,
format="%(asctime)s: %(message)s")
separate_log = False
def log(msg):
print(msg)
if separate_log:
info(msg)
help_mess = """usage: nohang [-h] [-c CONFIG]
optional arguments:
@ -65,7 +43,6 @@ wait_time = 10
notify_helper_path = '/usr/sbin/nohang_notify_helper'
victim_dict = dict()
@ -80,8 +57,19 @@ stat_dict = dict()
# define functions
def log(*msg):
"""
"""
print(*msg)
if separate_log:
info(*msg)
def print_version():
# сначала пытаться получ версию прямо из гита - вариант для неустановленых
"""
сначала пытаться получ версию прямо из гита - вариант для неустановленых,
для тех, кто еще не запускал make install
"""
try:
v = rline1('/etc/nohang/version')
except FileNotFoundError:
@ -94,6 +82,8 @@ def print_version():
def test():
"""
"""
print(sys.version)
print(sys.argv)
@ -155,11 +145,14 @@ def test():
def uptime():
"""
"""
return float(rline1('/proc/uptime').split(' ')[0])
def pid_to_starttime(pid):
"""
"""
try:
starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[
2].split(' ')[20]
@ -180,6 +173,8 @@ def get_victim_id(pid):
def errprint(*text):
"""
"""
print(*text, file=stderr, flush=True)
@ -200,19 +195,22 @@ def mlockall():
MCL_CURRENT | MCL_FUTURE
)
if result != 0:
print('Cannot lock all memory')
log('Cannot lock all memory')
else:
print('All memory locked with MCL_CURRENT | MCL_FUTURE')
log('All memory locked with MCL_CURRENT | MCL_FUTURE')
else:
print('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
def pid_to_state(pid):
"""
"""
return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1]
def update_stat_dict_and_print(key):
"""
"""
if key not in stat_dict:
stat_dict.update({key: 1})
@ -232,24 +230,10 @@ def update_stat_dict_and_print(key):
print(stats_msg)
'''
def psi_mem_some_avg_total():
if psi_support:
return float(rline1(psi_path).rpartition('=')[2])
'''
'''
def psi_mem_some_avg10():
if psi_support:
return float(rline1(psi_path).split(' ')[1].split('=')[1])
'''
# psi_metrics = 'some_avg10'
def find_psi_metrics_value(psi_path, psi_metrics):
"""
"""
if psi_support:
@ -309,9 +293,11 @@ def check_zram():
# Means that when setting zram disksize = 1 GiB available memory
# decrease by 0.0042 GiB.
# Found experimentally, requires clarification with different kernaels and architectures.
# Found experimentally, requires clarification with different kernaels and
# architectures.
# On small disk drives (up to gigabyte) it can be more, up to 0.0045.
# The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should be 0.001:
# The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should
# be 0.001:
# ("zram uses about 0.1% of the size of the disk"
# - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
# but this statement contradicts the experimental data.
@ -323,6 +309,8 @@ def check_zram():
def format_time(t):
"""
"""
t = int(t)
if t < 60:
return '{} sec'.format(t)
@ -406,15 +394,6 @@ def rline1(path):
'utf-8', 'ignore').split('\n')[0]
def kib_to_mib(num):
"""Convert KiB values to MiB values."""
return round(num / 1024.0)
@ -431,6 +410,8 @@ def just_percent_mem(num):
def just_percent_swap(num):
"""
"""
return str(round(num * 100, 1)).rjust(5, ' ')
@ -488,6 +469,8 @@ def pid_to_name(pid):
def pid_to_ppid(pid):
"""
"""
try:
with open('/proc/' + pid + '/status') as f:
for n, line in enumerate(f):
@ -506,6 +489,8 @@ def pid_to_ppid(pid):
def pid_to_ancestry(pid, max_ancestry_depth=1):
"""
"""
if max_ancestry_depth == 1:
ppid = pid_to_ppid(pid)
pname = pid_to_name(ppid)
@ -545,7 +530,7 @@ def pid_to_realpath(pid):
def pid_to_uid(pid):
'''return euid'''
"""return euid"""
try:
with open('/proc/' + pid + '/status') as f:
for n, line in enumerate(f):
@ -558,7 +543,7 @@ def pid_to_uid(pid):
def notify_send_wait(title, body):
'''GUI notifications with UID != 0'''
"""GUI notifications with UID != 0"""
with Popen(['notify-send', '--icon=dialog-warning', title, body]) as proc:
try:
proc.wait(timeout=wait_time)
@ -568,7 +553,7 @@ def notify_send_wait(title, body):
def notify_helper(title, body):
'''GUI notification with UID = 0'''
"""GUI notification with UID = 0"""
with Popen([notify_helper_path, title, body]) as proc:
try:
@ -727,6 +712,8 @@ pid_list = get_pid_list()
def get_non_decimal_pids():
"""
"""
non_decimal_list = []
for pid in pid_list:
if pid[0].isdecimal() is False:
@ -765,6 +752,10 @@ def pid_to_badness(pid):
if search(re_tup[1], uid) is not None:
badness += int(re_tup[0])
if forbid_negative_badness:
if badness < 0:
badness = 0
return badness, oom_score
except FileNotFoundError:
@ -796,9 +787,11 @@ def find_victim():
pid_badness_list = []
if print_proc_table:
log('===============================================================================')
log('=============================================================='
'=================')
log(' PID badness Name eUID cmdline')
log('------- ------- --------------- ---------- ---------------------------------')
log('------- ------- --------------- ---------- -----------'
'----------------------')
for pid in pid_list:
@ -834,10 +827,12 @@ def find_victim():
victim_name = pid_to_name(pid)
if print_proc_table:
log('===============================================================================')
log('============================================================'
'===================')
log(
'Process with highest badness (found in {} ms):\n PID: {}, Name: {}, badness: {}'.format(
'Process with highest badness (found in {} ms):\n PID: {}, Na'
'me: {}, badness: {}'.format(
round((time() - ft1) * 1000),
pid,
victim_name,
@ -849,6 +844,8 @@ def find_victim():
def find_victim_info(pid, victim_badness, name):
"""
"""
status0 = time()
@ -1080,8 +1077,9 @@ def implement_corrective_action(signal):
m = check_mem_and_swap()
ma = round(int(m[0]) / 1024.0)
sf = round(int(m[2]) / 1024.0)
log('Memory status before implementing a corrective action:\n MemAvailable'
': {} MiB, SwapFree: {} MiB'.format(ma, sf))
log('Memory status before implementing a corrective act'
'ion:\n MemAvailable'
': {} MiB, SwapFree: {} MiB'.format(ma, sf))
exit_status = os.system(etc_dict[name].replace(
'$PID', pid).replace('$NAME', pid_to_name(pid)))
@ -1093,13 +1091,15 @@ def implement_corrective_action(signal):
response_time = time() - time0
etc_info = 'Implement a corrective action:\n Run the command: {}' \
'\n Exit status: {}; total response time: {} ms'.format(
command.replace(
'$PID', pid).replace(
'$NAME', pid_to_name(pid)),
exit_status,
round(response_time * 1000))
etc_info = 'Implement a corrective act' \
'ion:\n Run the command: {}' \
'\n Exit status: {}; total response ' \
'time: {} ms'.format(
command.replace(
'$PID', pid).replace(
'$NAME', pid_to_name(pid)),
exit_status,
round(response_time * 1000))
print(etc_info)
@ -1110,7 +1110,8 @@ def implement_corrective_action(signal):
send_notify_etc(
pid,
name,
command.replace('$PID', pid).replace('$NAME', pid_to_name(pid)))
command.replace('$PID', pid).replace(
'$NAME', pid_to_name(pid)))
else:
@ -1119,8 +1120,9 @@ def implement_corrective_action(signal):
m = check_mem_and_swap()
ma = round(int(m[0]) / 1024.0)
sf = round(int(m[2]) / 1024.0)
log('Memory status before implementing a corrective action:\n MemAvailable'
': {} MiB, SwapFree: {} MiB'.format(ma, sf))
log('Memory status before implementing a correct'
'ive action:\n MemAvailable'
': {} MiB, SwapFree: {} MiB'.format(ma, sf))
os.kill(int(pid), signal)
response_time = time() - time0
@ -1149,12 +1151,14 @@ def implement_corrective_action(signal):
response_time = time() - time0
send_result = 'no such process; response time: {} ms'.format(
round(response_time * 1000))
key = 'FileNotFoundError (the victim died in the search process): '
key = 'FileNotFoundError (the victim died in the se' \
'arch process): '
except ProcessLookupError:
response_time = time() - time0
send_result = 'no such process; response time: {} ms'.format(
round(response_time * 1000))
key = 'ProcessLookupError (the victim died in the search process): '
key = 'ProcessLookupError (the victim died in the se' \
'arch process): '
log(preventing_oom_message)
@ -1175,7 +1179,6 @@ def implement_corrective_action(signal):
key = 'victim badness < min_badness'
update_stat_dict_and_print(key)
sleep_after_send_signal(signal)
@ -1262,7 +1265,8 @@ def calculate_percent(arg_key):
# Final validations...
if mem_min_percent < 0 or mem_min_percent > 100:
errprint(
'{}, as percents value, out of range [0; 100]\nExit'.format(arg_key))
'{}, as percents value, out of ran'
'ge [0; 100]\nExit'.format(arg_key))
exit(1)
# mem_min_sigterm_percent is clean and valid float percentage. Can
@ -1278,7 +1282,8 @@ def calculate_percent(arg_key):
mem_min_kb = mem_min_mb * 1024
if mem_min_kb > mem_total:
errprint(
'{} value can not be greater then MemTotal ({} MiB)\nExit'.format(
'{} value can not be greater then MemT'
'otal ({} MiB)\nExit'.format(
arg_key, round(
mem_total / 1024)))
exit(1)
@ -1381,6 +1386,7 @@ except ValueError:
print('Config:', config)
# todo: log it
##########################################################################
@ -1473,6 +1479,7 @@ except FileNotFoundError:
# validation of all parameters
forbid_negative_badness = conf_parse_bool('forbid_negative_badness')
print_victim_info = conf_parse_bool('print_victim_info')
print_config = conf_parse_bool('print_config')
print_mem_check_results = conf_parse_bool('print_mem_check_results')
@ -1491,20 +1498,23 @@ if regex_matching or re_match_cmdline or re_match_uid:
from re import search
import sre_constants
mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent = calculate_percent(
'mem_min_sigterm')
mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent = calculate_percent(
'mem_min_sigkill')
(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent
) = calculate_percent('mem_min_sigterm')
zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent = calculate_percent(
'zram_max_sigterm')
zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent = calculate_percent(
'zram_max_sigkill')
(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent
) = calculate_percent('mem_min_sigkill')
mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent = calculate_percent(
'mem_min_warnings')
zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent = calculate_percent(
'zram_max_warnings')
(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent
) = calculate_percent('zram_max_sigterm')
(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent
) = calculate_percent('zram_max_sigkill')
(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent
) = calculate_percent('mem_min_warnings')
(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent
) = calculate_percent('zram_max_warnings')
if 'rate_mem' in config_dict:
@ -1697,10 +1707,12 @@ if 'max_post_sigterm_victim_lifetime' in config_dict:
max_post_sigterm_victim_lifetime = string_to_float_convert_test(
config_dict['max_post_sigterm_victim_lifetime'])
if max_post_sigterm_victim_lifetime is None:
errprint('Invalid max_post_sigterm_victim_lifetime value, not float\nExit')
errprint('Invalid max_post_sigterm_victim_lifetime val'
'ue, not float\nExit')
exit(1)
if max_post_sigterm_victim_lifetime < 0:
errprint('max_post_sigterm_victim_lifetime must be non-negative number\nExit')
errprint('max_post_sigterm_victim_lifetime must be non-n'
'egative number\nExit')
exit(1)
else:
errprint('max_post_sigterm_victim_lifetime is not in config\nExit')
@ -1714,7 +1726,6 @@ else:
exit(1)
if 'psi_path' in config_dict:
psi_path = config_dict['psi_path']
else:
@ -1729,11 +1740,51 @@ else:
exit(1)
if 'log_dir' in config_dict:
log_dir = config_dict['log_dir']
else:
errprint('log_dir is not in config\nExit')
exit(1)
print_total_stat = conf_parse_bool('print_total_stat')
print_proc_table = conf_parse_bool('print_proc_table')
separate_log = conf_parse_bool('separate_log')
if separate_log:
import logging
from logging import basicConfig
from logging import info
try:
os.mkdir(log_dir)
except PermissionError:
print('ERROR: can not create log dir')
except FileExistsError:
pass
logfile = log_dir + '/nohang.log'
try:
with open(logfile, 'a') as f:
pass
except FileNotFoundError:
print('ERROR: log FileNotFoundError')
except PermissionError:
print('ERROR: log PermissionError')
try:
basicConfig(
filename=logfile,
level=logging.INFO,
format="%(asctime)s: %(message)s")
except PermissionError:
errprint('ERROR: Permission denied: {}'.format(logfile))
except FileNotFoundError:
errprint('ERROR: FileNotFoundError: {}'.format(logfile))
if 'min_mem_report_interval' in config_dict:
min_mem_report_interval = string_to_float_convert_test(
@ -1790,12 +1841,9 @@ if max_sleep_time < min_sleep_time:
psi_support = os.path.exists(psi_path)
##########################################################################
# Get KiB levels if it's possible.
# получ кб. если не кб - то процент. Если процент - находим кб ниже на
@ -1834,7 +1882,8 @@ def get_swap_threshold_tuple(string):
return value, False
else:
errprint('Invalid config file. There are invalid units somewhere\nExit')
errprint(
'Invalid config file. There are invalid units somewhere\nExit')
exit(1)
@ -1869,7 +1918,8 @@ else:
if print_config:
print(
'\n1. Memory levels to respond to as an OOM threat\n[displaying these options need fix]\n')
'\n1. Memory levels to respond to as an OOM threat\n[display'
'ing these options need fix]\n')
print('mem_min_sigterm: {} MiB, {} %'.format(
round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1)))
@ -1884,7 +1934,8 @@ if print_config:
print('zram_max_sigkill: {} MiB, {} %'.format(
round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1)))
print('\n2. The frequency of checking the level of available memory (and CPU usage)\n')
print('\n2. The frequency of checking the level of available m'
'emory (and CPU usage)\n')
print('rate_mem: {}'.format(rate_mem))
print('rate_swap: {}'.format(rate_swap))
print('rate_zram: {}'.format(rate_zram))
@ -1906,19 +1957,22 @@ if print_config:
print('(todo)')
print('\n5. The execution of a specific command instead of sending the\nSIGTERM signal\n')
print('\n5. The execution of a specific command instead of sen'
'ding the\nSIGTERM signal\n')
print('execute_the_command: {}'.format(execute_the_command))
if execute_the_command:
print('\nPROCESS NAME COMMAND TO EXECUTE')
for key in etc_dict:
print('{} {}'.format(key.ljust(15), etc_dict[key]))
print('\n6. GUI notifications:\n- OOM prevention results and\n- low memory warnings\n')
print('\n6. GUI notifications:\n- OOM prevention results and\n- low m'
'emory warnings\n')
print('gui_notifications: {}'.format(gui_notifications))
print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings))
if gui_low_memory_warnings:
print('min_time_between_warnings: {}'.format(min_time_between_warnings))
print('min_time_between_warnings: {}'.format(
min_time_between_warnings))
print('mem_min_warnings: {} MiB, {} %'.format(
round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1)))
@ -1973,7 +2027,7 @@ if print_proc_table:
find_victim()
print()
print('Monitoring started!')
log('Monitoring started!')
stdout.flush()
@ -2011,14 +2065,16 @@ while True:
if avg10 >= sigkill_psi and time() - psi_t0 >= psi_avg10_sleep_time:
time0 = time()
mem_info = 'PSI avg value ({}) > sigkill_psi ({})'.format(avg10, sigkill_psi)
mem_info = 'PSI avg value ({}) > sigkill_psi ({})'.format(
avg10, sigkill_psi)
implement_corrective_action(SIGKILL)
psi_t0 = time()
continue
if avg10 >= sigterm_psi and time() - psi_t0 >= psi_avg10_sleep_time:
time0 = time()
mem_info = 'PSI avg value ({}) > sigterm_psi ({})'.format(avg10, sigterm_psi)
mem_info = 'PSI avg value ({}) > sigterm_psi ({})'.format(
avg10, sigterm_psi)
implement_corrective_action(SIGTERM)
psi_t0 = time()
continue
@ -2076,7 +2132,7 @@ while True:
# Calculate 'swap-column' width
swap_len = len(str(round(swap_total / 1024.0)))
# Output avialable mem sizes
# Output available mem sizes
if swap_total == 0 and mem_used_zram == 0:
log('{}MemAvail: {} M, {} %{}'.format(
avg_value,
@ -2133,7 +2189,8 @@ while True:
swap_free <= swap_min_sigkill_kb):
time0 = time()
mem_info = 'Hard threshold exeeded\nMemory status that requires corrective actions:' \
mem_info = 'Hard threshold exceeded\nMemory status that requ' \
'ires corrective actions:' \
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
'p_min_sigkill [{} MiB, {} %]'.format(
@ -2154,7 +2211,8 @@ while True:
if mem_used_zram >= zram_max_sigkill_kb:
time0 = time()
mem_info = 'Hard threshold exeeded\nMemory status that requires corrective actions:' \
mem_info = 'Hard threshold exceeded\nMemory status that requir' \
'es corrective actions:' \
'\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
'kill [{} MiB, {} %]'.format(
kib_to_mib(mem_used_zram),
@ -2172,7 +2230,8 @@ while True:
time0 = time()
mem_info = 'Soft threshold exeeded\nMemory status that requires corrective actions:' \
mem_info = 'Soft threshold exceeded\nMemory status that requi' \
'res corrective actions:' \
'\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
'p_min_sigterm [{} MiB, {} %]'.format(
@ -2195,7 +2254,8 @@ while True:
if mem_used_zram >= zram_max_sigterm_kb:
time0 = time()
mem_info = 'Soft threshold exeeded\nMemory status that requires corrective actions:' \
mem_info = 'Soft threshold exceeded\nMemory status that requ' \
'ires corrective actions:' \
'\n MemUsedZram [{} MiB, {} %] >= ' \
'zram_max_sigterm [{} M, {} %]'.format(
kib_to_mib(mem_used_zram),

View File

@ -12,20 +12,22 @@
The configuration includes the following sections:
1. Memory levels to respond to as an OOM threat
2. The frequency of checking the level of available memory
2. Response on PSI memory metrics
3. The frequency of checking the level of available memory
(and CPU usage)
3. The prevention of killing innocent victims
4. Impact on the badness of processes via matching their
4. The prevention of killing innocent victims
5. Impact on the badness of processes via matching their
- names,
- cmdlines and
- UIDs
with regular expressions
5. The execution of a specific command instead of sending the
6. The execution of a specific command instead of sending the
SIGTERM signal
6. GUI notifications:
7. GUI notifications:
- OOM prevention results and
- low memory warnings
7. Output verbosity
8. Output verbosity
9. Misc
Just read the description of the parameters and edit the values.
Please restart the program after editing the config.
@ -56,26 +58,42 @@ swap_min_sigkill = 5 %
usual hang level, not recommended to set very high.
Can be specified in % and M. Valid values are floating-point
numbers from the range [0; 90] %.
numbers from the range [0; 90] %.
zram_max_sigterm = 50 %
zram_max_sigkill = 55 %
#####################################################################
Response on PSI memory some/full avg10/avg60/avg300 value
(/proc/pressure/memory on systems with Linux 4.20+).
2. Response on PSI memory metrics (it needs Linux 4.20 and up)
About PSI:
https://facebookmicrosites.github.io/psi/
Disabled by default (ignore_psi = True).
ignore_psi = True
Choose path to PSI file.
Choose a path to PSI file.
By default it monitors system-wide file: /proc/pressure/memory
You also can set file to monitor one cgroup slice.
For example:
psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
psi_path = ./psi_dummy
psi_path = /proc/pressure/memory
Valid psi_metrics are:
some_avg10
some_avg60
some_avg300
full_avg10
full_avg60
full_avg300
some_avg10 is most sensitive.
psi_metrics = some_avg10
sigterm_psi_avg10 = 60
@ -86,7 +104,7 @@ psi_avg10_sleep_time = 60
#####################################################################
2. The frequency of checking the amount of available memory
3. The frequency of checking the amount of available memory
(and CPU usage)
Coefficients that affect the intensity of monitoring. Reducing
@ -124,9 +142,10 @@ min_sleep_time = 0.1
#####################################################################
3. The prevention of killing innocent victims
4. The prevention of killing innocent victims
Минимальное значение oom_score, которым должен обладать
Минимальное значение bandess (по умолчанию равно oom_score),
которым должен обладать
процесс для того, чтобы ему был отправлен сигнал.
Позволяет предотвратить убийство невиновных если что-то
пойдет не так.
@ -163,7 +182,7 @@ oom_score_adj_max = 30
#####################################################################
4. Impact on the badness of processes via matching their names,
5. Impact on the badness of processes via matching their names,
cmdlines or UIDs with regular expressions using re.search().
See https://en.wikipedia.org/wiki/Regular_expression and
@ -179,7 +198,7 @@ oom_score_adj_max = 30
names, cmdlines and UIDs of processes.
4.1 Matching process names with RE patterns
5.1 Matching process names with RE patterns
Valid values are True and False.
@ -203,7 +222,7 @@ regex_matching = False
@PROCESSNAME_RE 300 /// ^(chromium|firefox)$
4.2 Matching cmdlines with RE patterns
5.2 Matching cmdlines with RE patterns
A good option that allows fine adjustment.
@ -214,7 +233,7 @@ re_match_cmdline = False
@CMDLINE_RE -200 /// ^/usr/lib/virtualbox
4.3 Matching UIDs with RE patterns
5.3 Matching UIDs with RE patterns
The most slow option
@ -227,7 +246,7 @@ re_match_uid = False
#####################################################################
5. The execution of a specific command instead of sending the
6. The execution of a specific command instead of sending the
SIGTERM signal.
For processes with a specific name you can specify a command to
@ -277,7 +296,7 @@ $ETC apache2 /// systemctl restart apache2
#####################################################################
6. GUI notifications:
7. GUI notifications:
- OOM prevention results and
- low memory warnings
@ -323,7 +342,7 @@ zram_max_warnings = 40 %
#####################################################################
7. Verbosity
8. Verbosity
Display the configuration when the program starts.
Valid values are True and False.
@ -357,15 +376,20 @@ print_proc_table = False
print_victim_info = True
Максимальная глубина показа родословной. По умолчанию (1)
показывается только родитель - PPID.
Максимальная глубина показа родословной жертвы.
По умолчанию (1) показывается только родитель - PPID.
Целое положительное число.
max_ancestry_depth = 3
max_ancestry_depth = 1
separate_log = False
log_dir = /var/log/nohang
#####################################################################
8. Misc
9. Misc
Жертва может не реагировать на SIGTERM.
max_post_sigterm_victim_lifetime - это время, при превышении
@ -378,5 +402,7 @@ max_post_sigterm_victim_lifetime = 10
Пустая строка - ничего не выполнять.
Произвольная строка.
post_kill_exe =
post_kill_exe =
forbid_negative_badness = True

View File

@ -49,7 +49,3 @@ while True:
stdout.flush()
sleep(0.1)