diff --git a/Makefile b/Makefile
index a1c0b0f..2eea755 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,6 @@ install:
install -d $(DESTDIR)$(BINDIR)
install -m0755 nohang $(DESTDIR)$(BINDIR)/nohang
- install -m0755 nohang_notify_helper $(DESTDIR)$(BINDIR)/nohang_notify_helper
install -m0755 oom-sort $(DESTDIR)$(BINDIR)/oom-sort
install -m0755 psi-top $(DESTDIR)$(BINDIR)/psi-top
install -m0755 psi-monitor $(DESTDIR)$(BINDIR)/psi-monitor
@@ -43,7 +42,6 @@ install-desktop:
install -d $(DESTDIR)$(BINDIR)
install -m0755 nohang $(DESTDIR)$(BINDIR)/nohang
- install -m0755 nohang_notify_helper $(DESTDIR)$(BINDIR)/nohang_notify_helper
install -m0755 oom-sort $(DESTDIR)$(BINDIR)/oom-sort
install -m0755 psi-top $(DESTDIR)$(BINDIR)/psi-top
install -m0755 psi-monitor $(DESTDIR)$(BINDIR)/psi-monitor
@@ -76,7 +74,6 @@ uninstall:
-systemctl disable nohang.service || true
-systemctl daemon-reload
rm -fv $(DESTDIR)$(BINDIR)/nohang
- rm -fv $(DESTDIR)$(BINDIR)/nohang_notify_helper
rm -fv $(DESTDIR)$(BINDIR)/oom-sort
rm -fv $(DESTDIR)$(BINDIR)/psi-top
rm -fv $(DESTDIR)$(BINDIR)/psi-monitor
@@ -95,7 +92,6 @@ systemd:
pylint:
-pylint3 -E nohang
- -pylint3 -E nohang_notify_helper
-pylint3 -E oom-sort
-pylint3 -E psi-top
-pylint3 -E psi-monitor
diff --git a/nohang b/nohang
index da010ea..4d0e433 100755
--- a/nohang
+++ b/nohang
@@ -17,6 +17,265 @@ from threading import Thread
# define functions
+def exe(cmd):
+ """ execute cmd
+ """
+ log('Execute the command: {}'.format(cmd))
+ t0 = time()
+ write_self_oom_score_adj(self_oom_score_adj_max)
+ err = os.system(cmd)
+ write_self_oom_score_adj(self_oom_score_adj_min)
+ dt = time() - t0
+ log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
+ return err
+
+
+def go(func, *a):
+ """ run func in new thread
+ """
+ t1 = time()
+ th = Thread(target=func, args=a)
+ th_name = th.getName()
+ if debug_threading:
+ log('Starting {}'.format(th_name))
+ try:
+ th.start()
+ t2 = time()
+ if debug_threading:
+ log('{} has started in {} ms'.format(
+ th_name, round((t2 - t1) * 1000, 1)))
+ except RuntimeError:
+ if debug_threading:
+ log('RuntimeError: cannot start {}'.format(th_name))
+
+
+def re_pid_environ(pid):
+ """
+ read environ of 1 process
+ returns tuple with USER, DBUS, DISPLAY like follow:
+ ('user', 'DISPLAY=:0',
+ 'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus')
+ returns None if these vars is not in /proc/[pid]/environ
+ """
+
+ try:
+ with open('/proc/' + pid + '/environ') as f:
+ env = f.read()
+ except FileNotFoundError:
+ log('notify helper: FileNotFoundError')
+ return None
+ except ProcessLookupError:
+ log('notify helper: ProcessLookupError')
+ return None
+
+ if display_env in env and dbus_env in env and user_env in env:
+
+ env_list = env.split('\x00')
+
+ # iterating over a list of process environment variables
+ for i in env_list:
+
+ # exclude Display Manager's user
+ if i.startswith('HOME=/var'):
+ return None
+
+ if i.startswith(user_env):
+ user = i
+ if user == 'USER=root':
+ return None
+ continue
+
+ if i.startswith(display_env):
+ display = i[:10]
+ continue
+
+ if i.startswith(dbus_env):
+ dbus = i
+ continue
+
+ try:
+ return user.partition('USER=')[2], display, dbus
+ except UnboundLocalError:
+ log('notify helper: UnboundLocalError')
+ return None
+
+
+def root_notify_env():
+ """return set(user, display, dbus)"""
+ unsorted_envs_list = []
+ # iterates over processes, find processes with suitable env
+ for pid in os.listdir('/proc'):
+
+ if os.path.exists('/proc/' + pid + '/exe') is True:
+ one_env = re_pid_environ(pid)
+ unsorted_envs_list.append(one_env)
+
+ env = set(unsorted_envs_list)
+ env.discard(None)
+
+ # deduplicate dbus
+ new_env = []
+ end = []
+ for i in env:
+ key = i[0] + i[1]
+ if key not in end:
+ end.append(key)
+ new_env.append(i)
+ else:
+ continue
+
+ return new_env
+
+
+def pop(cmd, username):
+ """
+ """
+ if swap_total == 0:
+ wait_time = 2
+ else:
+ wait_time = 20
+
+ t3 = time()
+
+ with Popen(cmd) as proc:
+ try:
+ proc.wait(timeout=wait_time)
+ except TimeoutExpired:
+ proc.kill()
+ if debug_gui_notifications:
+ log('TimeoutExpired: notify user: {}'.format(username))
+
+ t4 = time()
+
+ if debug_gui_notifications:
+ log('Popen time: {} sec; cmd: {}'.format(round(t4 - t3, 3), cmd))
+
+
+def send_notification(title, body):
+ """
+ """
+ if self_uid != 0:
+ cmd = ['notify-send', '--icon=dialog-warning', title, body]
+ username = '(UID={})'.format(self_uid)
+ pop(cmd, username)
+ return None
+
+ t1 = time()
+
+ if envd['t'] is None:
+
+ list_with_envs = root_notify_env()
+ envd['list_with_envs'] = list_with_envs
+ envd['t'] = time()
+
+ elif time() - envd['t'] > env_cache_time:
+
+ list_with_envs = root_notify_env()
+ envd['list_with_envs'] = list_with_envs
+ envd['t'] = time()
+
+ else:
+
+ list_with_envs = envd['list_with_envs']
+
+ list_len = len(list_with_envs)
+
+ t2 = time()
+ if debug_gui_notifications:
+ log('Find env time: {} ms'.format(round((t2 - t1) * 1000)))
+
+ # if somebody logged in with GUI
+ if list_len > 0:
+
+ for i in list_with_envs:
+ if debug_gui_notifications:
+ log('Send a GUI notification:\n ',
+ 'title: ', [title],
+ '\n body: ', [body],
+ '\n user/env:', i
+ )
+
+ # iterating over logged-in users
+ for i in list_with_envs:
+ username, display_env, dbus_env = i[0], i[1], i[2]
+ display_tuple = display_env.partition('=')
+ dbus_tuple = dbus_env.partition('=')
+ display_value = display_tuple[2]
+ dbus_value = dbus_tuple[2]
+
+ cmd = [
+ 'sudo', '-u', username,
+ 'env',
+ 'DISPLAY=' + display_value,
+ 'DBUS_SESSION_BUS_ADDRESS=' + dbus_value,
+ 'notify-send',
+ '--icon=dialog-warning',
+ title,
+ body
+ ]
+
+ go(pop, cmd, username)
+
+
+def send_notify_warn():
+ """ Implement Low memory warnings
+ """
+ log('Warning threshold exceeded')
+
+ if check_warning_exe:
+ go(exe, warning_exe)
+
+ else:
+
+ title = 'Low memory'
+
+ body = 'MemAvail: {}%\nSwapFree: {}%'.format(
+ round(mem_available / mem_total * 100),
+ round(swap_free / (swap_total + 0.1) * 100)
+ )
+
+ go(send_notification, title, body)
+
+
+def send_notify(threshold, name, pid):
+ """
+ Notificate about OOM Preventing.
+
+ threshold: key for notify_sig_dict
+ name: str process name
+ pid: str process pid
+ """
+
+ title = 'Freeze prevention'
+ body = '{} [{}] {}'.format(
+ notify_sig_dict[threshold],
+ pid,
+ name.replace(
+ # symbol '&' can break notifications in some themes,
+ # therefore it is replaced by '*'
+ '&', '*'
+ )
+ )
+
+ go(send_notification, title, body)
+
+
+def send_notify_etc(pid, name, command):
+ """
+ Notificate about OOM Preventing.
+
+ command: str command that will be executed
+ name: str process name
+ pid: str process pid
+ """
+ title = 'Freeze prevention'
+ body = 'Victim is [{}] {}\nExecute the co' \
+ 'mmand:\n{}'.format(
+ pid, name.replace('&', '*'), command.replace('&', '*'))
+
+ go(send_notification, title, body)
+
+
def check_config():
"""
"""
@@ -181,15 +440,6 @@ def check_config():
exit()
-def encoder(string):
- """
- """
- encoded = ''
- for i in string:
- encoded += str(ord(i)) + ':'
- return encoded[:-1]
-
-
def get_swap_threshold_tuple(string):
# re (Num %, True) or (Num KiB, False)
"""Returns KiB value if abs val was set in config, or tuple with %"""
@@ -292,36 +542,6 @@ def signal_handler_inner(signum, frame):
sig_dict[signum]))
-def exe(cmd):
- """
- """
-
- log('Execute the command: {}'.format(cmd))
- t0 = time()
- write_self_oom_score_adj(self_oom_score_adj_max)
- err = os.system(cmd)
- write_self_oom_score_adj(self_oom_score_adj_min)
- dt = time() - t0
- log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
- return err
-
-
-def go(func, *a):
- """ run func in new thread
- """
- t1 = time()
- try:
- Thread(target=func, args=a).start()
- except RuntimeError:
- print('RuntimeError: cannot spawn a new thread')
- return 1
- t2 = time()
- log('New thread spawned in {} ms'.format(
- round((t2 - t1) * 1000, 1)
- ))
- return 0
-
-
def write(path, string):
"""
"""
@@ -357,15 +577,9 @@ def func_print_proc_table():
def log(*msg):
"""
"""
- try:
- print(*msg)
- except OSError:
- sleep(0.01)
+ print(*msg)
if separate_log:
- try:
- logging.info(*msg)
- except OSError:
- sleep(0.01)
+ logging.info(*msg)
def print_version():
@@ -1009,80 +1223,6 @@ def zram_stat(zram_id):
return disksize, mem_used_total # BYTES, str
-def send_notify_warn():
- """
- Look for process with maximum 'badness' and warn user with notification.
- (implement Low memory warnings)
- """
- log('Warning threshold exceeded')
-
- if check_warning_exe:
- exe(warning_exe)
-
- else:
-
- title = 'Low memory'
-
- body = 'MemAvail: {}%\nSwapFree: {}%'.format(
- round(mem_available / mem_total * 100),
- round(swap_free / (swap_total + 0.1) * 100)
- )
-
- send_notification(title, body)
-
-
-def send_notify(threshold, name, pid):
- """
- Notificate about OOM Preventing.
-
- threshold: key for notify_sig_dict
- name: str process name
- pid: str process pid
- """
-
- title = 'Freeze prevention'
- body = '{} [{}] {}'.format(
- notify_sig_dict[threshold],
- pid,
- name.replace(
- # symbol '&' can break notifications in some themes,
- # therefore it is replaced by '*'
- '&', '*'
- )
- )
-
- send_notification(title, body)
-
-
-def send_notify_etc(pid, name, command):
- """
- Notificate about OOM Preventing.
-
- command: str command that will be executed
- name: str process name
- pid: str process pid
- """
- title = 'Freeze prevention'
- body = 'Victim is [{}] {}\nExecute the co' \
- 'mmand:\n{}'.format(
- pid, name.replace('&', '*'), command.replace('&', '*'))
-
- send_notification(title, body)
-
-
-def send_notification(title, body):
- """
- """
- cmd = '{} "--euid={}" "--debug={}" "--title={}" "--body={}" &'.format(
- notify_helper_path,
- self_uid,
- debug_gui_notifications,
- title,
- encoder(body))
-
- go(exe, cmd)
-
-
def get_pid_list():
"""
Find pid list expect kthreads and zombies
@@ -2163,11 +2303,7 @@ def sleep_after_check_mem():
log('Sleep {} sec (t_mem={}, t_swap={}{})'.format(round(t, 2), round(
t_mem, 2), round(t_swap, 2), z))
- try:
- stdout.flush()
- except OSError:
- pass
-
+ stdout.flush()
sleep(t)
@@ -2631,10 +2767,19 @@ print_config_at_startup = conf_parse_bool('print_config_at_startup')
print_mem_check_results = conf_parse_bool('print_mem_check_results')
debug_sleep = conf_parse_bool('debug_sleep')
low_memory_warnings_enabled = conf_parse_bool('low_memory_warnings_enabled')
+
+
+if low_memory_warnings_enabled or post_action_gui_notifications:
+ from subprocess import Popen, TimeoutExpired
+
+
post_action_gui_notifications = conf_parse_bool(
'post_action_gui_notifications')
+debug_threading = conf_parse_bool('debug_threading')
+
+
psi_checking_enabled = conf_parse_bool('psi_checking_enabled')
ignore_psi = not psi_checking_enabled
@@ -2694,6 +2839,20 @@ else:
exit(1)
+if 'env_cache_time' in config_dict:
+ env_cache_time = string_to_float_convert_test(
+ config_dict['env_cache_time'])
+ if env_cache_time is None:
+ errprint('Invalid env_cache_time value, not float\nExit')
+ exit(1)
+ if env_cache_time < 0:
+ errprint('fill_rate_mem MUST be >= 0\nExit')
+ exit(1)
+else:
+ errprint('fill_rate_mem not in config\nExit')
+ exit(1)
+
+
if 'fill_rate_mem' in config_dict:
fill_rate_mem = string_to_float_convert_test(config_dict['fill_rate_mem'])
if fill_rate_mem is None:
@@ -3201,6 +3360,14 @@ log('Monitoring has started!')
stdout.flush()
+display_env = 'DISPLAY='
+dbus_env = 'DBUS_SESSION_BUS_ADDRESS='
+user_env = 'USER='
+
+envd = dict()
+envd['list_with_envs'] = envd['t'] = None
+
+
##########################################################################
diff --git a/nohang-desktop.conf b/nohang-desktop.conf
index e6d127c..7144fc1 100644
--- a/nohang-desktop.conf
+++ b/nohang-desktop.conf
@@ -297,6 +297,9 @@ warning_threshold_max_psi = 100
min_post_warning_delay = 30
+env_cache_time = 300
+
+
Ampersands (&) will be replaced with asterisks (*) in process
names and in commands.
@@ -347,6 +350,8 @@ debug_sleep = False
separate_log = False
+debug_threading = False
+
###############################################################################
9. Misc
diff --git a/nohang.conf b/nohang.conf
index 1b13348..b36e0e9 100644
--- a/nohang.conf
+++ b/nohang.conf
@@ -293,6 +293,8 @@ warning_threshold_max_psi = 100
min_post_warning_delay = 20
+env_cache_time = 300
+
Ampersands (&) will be replaced with asterisks (*) in process
names and in commands.
@@ -343,6 +345,8 @@ debug_sleep = False
separate_log = False
+debug_threading = False
+
###############################################################################
9. Misc
diff --git a/old/nohang b/old/nohang
new file mode 100755
index 0000000..da010ea
--- /dev/null
+++ b/old/nohang
@@ -0,0 +1,3360 @@
+#!/usr/bin/env python3
+"""A daemon that prevents OOM in Linux systems."""
+
+import os
+from ctypes import CDLL
+from time import sleep, time
+from operator import itemgetter
+from sys import stdout, stderr, argv, exit
+from re import search
+from sre_constants import error as invalid_re
+from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
+from threading import Thread
+
+
+##########################################################################
+
+# define functions
+
+
+def check_config():
+ """
+ """
+
+ log('#' * 79)
+
+ log('0. Common zram settings')
+
+ log(' zram_checking_enabled: {}'.format(zram_checking_enabled))
+
+ log('1. Thresholds below which a signal should be sent to the victim')
+
+ log(' soft_threshold_min_mem: {} MiB, {} %'.format(
+ round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1)))
+ log(' hard_threshold_min_mem: {} MiB, {} %'.format(
+ round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1)))
+ log(' soft_threshold_min_swap: {}'.format(soft_threshold_min_swap))
+ log(' hard_threshold_min_swap: {}'.format(hard_threshold_min_swap))
+ log(' soft_threshold_max_zram: {} MiB, {} %'.format(
+ round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1)))
+ log(' hard_threshold_max_zram: {} MiB, {} %'.format(
+ round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1)))
+
+ log('2. Response on PSI memory metrics')
+
+ log(' psi_checking_enabled: {}'.format(psi_checking_enabled))
+ log(' psi_path: {}'.format(psi_path))
+ log(' psi_metrics: {}'.format(psi_metrics))
+ log(' soft_threshold_max_psi: {}'.format(soft_threshold_max_psi))
+ log(' hard_threshold_max_psi: {}'.format(hard_threshold_max_psi))
+ log(' psi_excess_duration: {} sec'.format(psi_excess_duration))
+ log(' psi_post_action_delay: {} sec'.format(psi_post_action_delay))
+
+ log('3. The frequency of checking the amount of available memory')
+
+ log(' fill_rate_mem: {}'.format(fill_rate_mem))
+ log(' fill_rate_swap: {}'.format(fill_rate_swap))
+ log(' fill_rate_zram: {}'.format(fill_rate_zram))
+ log(' max_sleep: {} sec'.format(max_sleep))
+ log(' min_sleep: {} sec'.format(min_sleep))
+ log(' over_sleep: {} sec'.format(over_sleep))
+
+ log('4. The prevention of killing innocent victims')
+
+ log(' min_badness: {}'.format(min_badness))
+ log(' post_soft_action_delay: {} sec'.format(post_soft_action_delay))
+ log(' post_zombie_delay: {} sec'.format(post_zombie_delay))
+ log(' victim_cache_time: {} sec'.format(victim_cache_time))
+ log(' ignore_positive_oom_score_adj: {}'.format(
+ ignore_positive_oom_score_adj))
+
+ log('5. Impact on the badness of processes')
+
+ log('5.1. Matching process names with RE patterns')
+ if len(badness_adj_re_name_list) > 0:
+ log(' regexp: badness_adj:')
+ for i in badness_adj_re_name_list:
+ log(' {} {}'.format(i[1], i[0]))
+ else:
+ log(' (not set)')
+
+ log('5.2. Matching CGroup_v1-line with RE patterns')
+ if len(badness_adj_re_cgroup_v1_list) > 0:
+ log(' regexp: badness_adj:')
+ for i in badness_adj_re_cgroup_v1_list:
+ log(' {} {}'.format(i[1], i[0]))
+ else:
+ log(' (not set)')
+
+ log('5.3. Matching CGroup_v2-line with RE patterns')
+ if len(badness_adj_re_cgroup_v2_list) > 0:
+ log(' regexp: badness_adj:')
+ for i in badness_adj_re_cgroup_v1_list:
+ log(' {} {}'.format(i[1], i[0]))
+ else:
+ log(' (not set)')
+
+ log('5.4. Matching eUIDs with RE patterns')
+ if len(badness_adj_re_cgroup_v2_list) > 0:
+ log(' regexp: badness_adj:')
+ for i in badness_adj_re_uid_list:
+ log(' {} {}'.format(i[1], i[0]))
+ else:
+ log(' (not set)')
+
+ log('5.5. Matching realpath with RE patterns')
+ if len(badness_adj_re_cgroup_v2_list) > 0:
+ log(' regexp: badness_adj:')
+ for i in badness_adj_re_realpath_list:
+ log(' {} {}'.format(i[1], i[0]))
+ else:
+ log(' (not set)')
+
+ log('5.6. Matching cmdlines with RE patterns')
+ if len(badness_adj_re_cgroup_v2_list) > 0:
+ log(' regexp: badness_adj:')
+ for i in badness_adj_re_cmdline_list:
+ log(' {} {}'.format(i[1], i[0]))
+ else:
+ log(' (not set)')
+
+ log('5.7. Matching environ with RE patterns')
+ if len(badness_adj_re_cgroup_v2_list) > 0:
+ log(' regexp: badness_adj:')
+ for i in badness_adj_re_environ_list:
+ log(' {} {}'.format(i[1], i[0]))
+ else:
+ log(' (not set)')
+
+ log('6. Customize corrective actions')
+
+ if len(soft_actions_list) > 0:
+ log(' Match by: regexp: command: ')
+ for i in soft_actions_list:
+ log(' {} {} {}'.format(i[0], i[1], i[2]))
+ else:
+ log(' (not set)')
+
+ log('7. GUI notifications')
+
+ log(' post_action_gui_notifications: {}'.format(
+ post_action_gui_notifications))
+ log(' low_memory_warnings_enabled: {}'.format(
+ low_memory_warnings_enabled))
+ log(' warning_exe: {}'.format(warning_exe))
+ log(' warning_threshold_min_mem: {} MiB, {} %'.format(round(
+ warning_threshold_min_mem_mb), round(warning_threshold_min_mem_percent, 1)))
+ log(' warning_threshold_min_swap: {}'.format(warning_threshold_min_swap))
+ log(' warning_threshold_max_zram: {} MiB, {} %'.format(round(
+ warning_threshold_max_zram_mb), round(warning_threshold_max_zram_percent, 1)))
+ log(' warning_threshold_max_psi: {}'.format(warning_threshold_max_psi))
+ log(' min_post_warning_delay: {} sec'.format(min_post_warning_delay))
+
+ log('8. Verbosity')
+
+ log(' print_config_at_startup: {}'.format(print_config_at_startup))
+ log(' print_mem_check_results: {}'.format(print_mem_check_results))
+ log(' min_mem_report_interval: {} sec'.format(min_mem_report_interval))
+ log(' debug_sleep: {}'.format(debug_sleep))
+ log(' print_statistics: {}'.format(print_statistics))
+ log(' print_proc_table: {}'.format(print_proc_table))
+ log(' extra_table_info: {}'.format(extra_table_info))
+ log(' print_victim_status: {}'.format(print_victim_status))
+ log(' print_victim_cmdline: {}'.format(print_victim_cmdline))
+ log(' max_victim_ancestry_depth: {}'.format(max_victim_ancestry_depth))
+ log(' debug_gui_notifications: {}'.format(debug_gui_notifications))
+ log(' separate_log: {}'.format(separate_log))
+ log(' debug_psi: {}'.format(debug_psi))
+
+ log('9. Misc')
+
+ log(' max_soft_exit_time: {} sec'.format(max_soft_exit_time))
+ log(' post_kill_exe: {}'.format(post_kill_exe))
+ log(' forbid_negative_badness: {}'.format(
+ forbid_negative_badness))
+
+ # log(': {}'.format())
+ log('#' * 79)
+
+ if check_config_flag:
+ log('config is OK')
+ exit()
+
+
+def encoder(string):
+ """
+ """
+ encoded = ''
+ for i in string:
+ encoded += str(ord(i)) + ':'
+ return encoded[:-1]
+
+
+def get_swap_threshold_tuple(string):
+ # re (Num %, True) or (Num KiB, False)
+ """Returns KiB value if abs val was set in config, or tuple with %"""
+ # return tuple with abs and bool: (abs %, True) or (abs MiB, False)
+
+ if string.endswith('%'):
+ valid = string_to_float_convert_test(string[:-1])
+ if valid is None:
+ errprint('somewhere swap unit is not float_%')
+ exit(1)
+
+ value = float(string[:-1].strip())
+ if value < 0 or value > 100:
+ errprint('invalid value, must be from the range[0; 100] %')
+ exit(1)
+
+ return value, True
+
+ elif string.endswith('M'):
+ valid = string_to_float_convert_test(string[:-1])
+ if valid is None:
+ errprint('somewhere swap unit is not float_M')
+ exit(1)
+
+ value = float(string[:-1].strip()) * 1024
+ if value < 0:
+ errprint('invalid unit in config (negative value)')
+ exit(1)
+
+ return value, False
+
+ else:
+ errprint(
+ 'Invalid config file. There are invalid units somewhere\nExit')
+ exit(1)
+
+
+def find_cgroup_indexes():
+ """ Find cgroup-line positions in /proc/*/cgroup file.
+ """
+
+ cgroup_v1_index = cgroup_v2_index = None
+
+ with open('/proc/self/cgroup') as f:
+ for index, line in enumerate(f):
+ if ':name=' in line:
+ cgroup_v1_index = index
+ if line.startswith('0::'):
+ cgroup_v2_index = index
+
+ return cgroup_v1_index, cgroup_v2_index
+
+
+def pid_to_rss(pid):
+ """
+ """
+ try:
+ rss = int(rline1(
+ '/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE
+ except IndexError:
+ rss = None
+ except FileNotFoundError:
+ rss = None
+ except ProcessLookupError:
+ rss = None
+ return rss
+
+
+def pid_to_vm_size(pid):
+ """
+ """
+ try:
+ vm_size = int(rline1(
+ '/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE
+ except IndexError:
+ vm_size = None
+ except FileNotFoundError:
+ vm_size = None
+ except ProcessLookupError:
+ vm_size = None
+ return vm_size
+
+
+def signal_handler(signum, frame):
+ """
+ """
+ for i in sig_list:
+ signal(i, signal_handler_inner)
+ log('Signal handler called with the {} signal '.format(
+ sig_dict[signum]))
+ update_stat_dict_and_print(None)
+ log('Exit')
+ exit()
+
+
+def signal_handler_inner(signum, frame):
+ """
+ """
+ log('Signal handler called with the {} signal (ignored) '.format(
+ sig_dict[signum]))
+
+
+def exe(cmd):
+ """
+ """
+
+ log('Execute the command: {}'.format(cmd))
+ t0 = time()
+ write_self_oom_score_adj(self_oom_score_adj_max)
+ err = os.system(cmd)
+ write_self_oom_score_adj(self_oom_score_adj_min)
+ dt = time() - t0
+ log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
+ return err
+
+
+def go(func, *a):
+ """ run func in new thread
+ """
+ t1 = time()
+ try:
+ Thread(target=func, args=a).start()
+ except RuntimeError:
+ print('RuntimeError: cannot spawn a new thread')
+ return 1
+ t2 = time()
+ log('New thread spawned in {} ms'.format(
+ round((t2 - t1) * 1000, 1)
+ ))
+ return 0
+
+
+def write(path, string):
+ """
+ """
+ with open(path, 'w') as f:
+ f.write(string)
+
+
+def write_self_oom_score_adj(new_value):
+ """
+ """
+ if root:
+ write('/proc/self/oom_score_adj', new_value)
+
+
+def valid_re(reg_exp):
+ """Validate regular expression.
+ """
+ try:
+ search(reg_exp, '')
+ except invalid_re:
+ log('Invalid config: invalid regexp: {}'.format(reg_exp))
+ exit(1)
+
+
+def func_print_proc_table():
+ """
+ """
+ print_proc_table = True
+ find_victim(print_proc_table)
+ exit()
+
+
+def log(*msg):
+ """
+ """
+ try:
+ print(*msg)
+ except OSError:
+ sleep(0.01)
+ if separate_log:
+ try:
+ logging.info(*msg)
+ except OSError:
+ sleep(0.01)
+
+
+def print_version():
+ """
+ """
+ try:
+ v = rline1('/etc/nohang/version')
+ except FileNotFoundError:
+ v = None
+ if v is None:
+ print('nohang unknown version')
+ else:
+ print('nohang ' + v)
+ exit()
+
+
+def pid_to_cgroup_v1(pid):
+ """
+ """
+ cgroup_v1 = ''
+ try:
+ with open('/proc/' + pid + '/cgroup') as f:
+ for index, line in enumerate(f):
+ if index == cgroup_v1_index:
+ cgroup_v1 = '/' + line.partition('/')[2][:-1]
+ return cgroup_v1
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_cgroup_v2(pid):
+ """
+ """
+ cgroup_v2 = ''
+ try:
+ with open('/proc/' + pid + '/cgroup') as f:
+ for index, line in enumerate(f):
+ if index == cgroup_v2_index:
+ cgroup_v2 = line[3:-1]
+ return cgroup_v2
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_starttime(pid):
+ """ handle FNF error!
+ """
+ try:
+ starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[
+ 2].split(' ')[20]
+
+ except UnicodeDecodeError:
+ with open('/proc/' + pid + '/stat', 'rb') as f:
+ starttime = f.read().decode('utf-8', 'ignore').rpartition(
+ ')')[2].split(' ')[20]
+
+ return float(starttime) / SC_CLK_TCK
+
+
+def get_victim_id(pid):
+ """victim_id is starttime + pid"""
+ try:
+ return rline1('/proc/' + pid + '/stat').rpartition(
+ ')')[2].split(' ')[20] + '_pid' + pid
+ except FileNotFoundError:
+ return ''
+ except ProcessLookupError:
+ return ''
+
+
+def pid_to_state(pid):
+ """
+ """
+ try:
+ with open('/proc/' + pid + '/stat', 'rb') as f:
+ return f.read(40).decode('utf-8', 'ignore').rpartition(')')[2][1]
+ except FileNotFoundError:
+ return ''
+ except ProcessLookupError:
+ return ''
+ except IndexError:
+ with open('/proc/' + pid + '/stat', 'rb') as f:
+ return f.read().decode('utf-8', 'ignore').rpartition(')')[2][1]
+
+
+def pid_to_name(pid):
+ """
+ """
+ try:
+ with open('/proc/' + pid + '/comm', 'rb') as f:
+ return f.read().decode('utf-8', 'ignore')[:-1]
+ except FileNotFoundError:
+ return ''
+ except ProcessLookupError:
+ return ''
+
+
+def pid_to_ppid(pid):
+ """
+ """
+ try:
+ with open('/proc/' + pid + '/status') as f:
+ for n, line in enumerate(f):
+ if n is ppid_index:
+ return line.split('\t')[1].strip()
+ except FileNotFoundError:
+ return ''
+ except ProcessLookupError:
+ return ''
+ except UnicodeDecodeError:
+ with open('/proc/' + pid + '/status', 'rb') as f:
+ f_list = f.read().decode('utf-8', 'ignore').split('\n')
+ for i in range(len(f_list)):
+ if i is ppid_index:
+ return f_list[i].split('\t')[1]
+
+
+def pid_to_ancestry(pid, max_victim_ancestry_depth=1):
+ """
+ """
+ if max_victim_ancestry_depth == 1:
+ ppid = pid_to_ppid(pid)
+ pname = pid_to_name(ppid)
+ return '\n PPID: {} ({})'.format(ppid, pname)
+ if max_victim_ancestry_depth == 0:
+ return ''
+ anc_list = []
+ for i in range(max_victim_ancestry_depth):
+ ppid = pid_to_ppid(pid)
+ pname = pid_to_name(ppid)
+ anc_list.append((ppid, pname))
+ if ppid == '1':
+ break
+ pid = ppid
+ a = ''
+ for i in anc_list:
+ a = a + ' <= PID {} ({})'.format(i[0], i[1])
+ return '\n Ancestry: ' + a[4:]
+
+
+def pid_to_cmdline(pid):
+ """
+ Get process cmdline by pid.
+
+ pid: str pid of required process
+ returns string cmdline
+ """
+ try:
+ with open('/proc/' + pid + '/cmdline') as f:
+ return f.read().replace('\x00', ' ').rstrip()
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_environ(pid):
+ """
+ Get process environ by pid.
+
+ pid: str pid of required process
+ returns string environ
+ """
+ try:
+ with open('/proc/' + pid + '/environ') as f:
+ return f.read().replace('\x00', ' ').rstrip()
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_realpath(pid):
+ """
+ """
+ try:
+ return os.path.realpath('/proc/' + pid + '/exe')
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_uid(pid):
+ """return euid"""
+ try:
+ with open('/proc/' + pid + '/status') as f:
+ for n, line in enumerate(f):
+ if n is uid_index:
+ return line.split('\t')[2]
+ except UnicodeDecodeError:
+ with open('/proc/' + pid + '/status', 'rb') as f:
+ f_list = f.read().decode('utf-8', 'ignore').split('\n')
+ return f_list[uid_index].split('\t')[2]
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_badness(pid):
+ """Find and modify badness (if it needs)."""
+
+ try:
+
+ oom_score = int(rline1('/proc/' + pid + '/oom_score'))
+ badness = oom_score
+
+ if ignore_positive_oom_score_adj:
+ oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
+ if oom_score_adj > 0:
+ badness = badness - oom_score_adj
+
+ if regex_matching:
+ name = pid_to_name(pid)
+ for re_tup in badness_adj_re_name_list:
+ if search(re_tup[1], name) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_cgroup_v1:
+ cgroup_v1 = pid_to_cgroup_v1(pid)
+ for re_tup in badness_adj_re_cgroup_v1_list:
+ if search(re_tup[1], cgroup_v1) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_cgroup_v2:
+ cgroup_v2 = pid_to_cgroup_v2(pid)
+ for re_tup in badness_adj_re_cgroup_v2_list:
+ if search(re_tup[1], cgroup_v2) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_realpath:
+ realpath = pid_to_realpath(pid)
+ for re_tup in badness_adj_re_realpath_list:
+ if search(re_tup[1], realpath) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_cmdline:
+ cmdline = pid_to_cmdline(pid)
+ for re_tup in badness_adj_re_cmdline_list:
+ if search(re_tup[1], cmdline) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_environ:
+ environ = pid_to_environ(pid)
+ for re_tup in badness_adj_re_environ_list:
+ if search(re_tup[1], environ) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_uid:
+ uid = pid_to_uid(pid)
+ for re_tup in badness_adj_re_uid_list:
+ if search(re_tup[1], uid) is not None:
+ badness += int(re_tup[0])
+
+ if forbid_negative_badness:
+ if badness < 0:
+ badness = 0
+
+ return badness, oom_score
+
+ except FileNotFoundError:
+ return None, None
+ except ProcessLookupError:
+ return None, None
+
+
+def pid_to_status(pid):
+ """
+ """
+
+ try:
+
+ with open('/proc/' + pid + '/status') as f:
+
+ for n, line in enumerate(f):
+
+ if n == 0:
+ name = line.split('\t')[1][:-1]
+
+ if n is state_index:
+ state = line.split('\t')[1][0]
+ continue
+
+ if n is ppid_index:
+ ppid = line.split('\t')[1][:-1]
+ continue
+
+ if n is uid_index:
+ uid = line.split('\t')[2]
+ continue
+
+ if n is vm_size_index:
+ vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is vm_rss_index:
+ vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is vm_swap_index:
+ vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
+ break
+
+ return name, state, ppid, uid, vm_size, vm_rss, vm_swap
+
+ except UnicodeDecodeError:
+ return pid_to_status_unicode(pid)
+
+ except FileNotFoundError:
+ return None
+
+ except ProcessLookupError:
+ return None
+
+ except ValueError:
+ return None
+
+
+def pid_to_status_unicode(pid):
+ """
+ """
+ try:
+
+ with open('/proc/' + pid + '/status', 'rb') as f:
+ f_list = f.read().decode('utf-8', 'ignore').split('\n')
+
+ for i in range(len(f_list)):
+
+ if i == 0:
+ name = f_list[i].split('\t')[1]
+
+ if i is state_index:
+ state = f_list[i].split('\t')[1][0]
+
+ if i is ppid_index:
+ ppid = f_list[i].split('\t')[1]
+
+ if i is uid_index:
+ uid = f_list[i].split('\t')[2]
+
+ if i is vm_size_index:
+ vm_size = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is vm_rss_index:
+ vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
+
+ if i is vm_swap_index:
+ vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
+
+ return name, state, ppid, uid, vm_size, vm_rss, vm_swap
+
+ except FileNotFoundError:
+ return None
+
+ except ProcessLookupError:
+ return None
+
+ except ValueError:
+ return None
+
+
+def uptime():
+ """
+ """
+ return float(rline1('/proc/uptime').split(' ')[0])
+
+
+def errprint(*text):
+ """
+ """
+ print(*text, file=stderr, flush=True)
+
+
+def mlockall():
+ """Lock all memory to prevent swapping nohang process."""
+
+ MCL_CURRENT = 1
+ MCL_FUTURE = 2
+ MCL_ONFAULT = 4
+
+ libc = CDLL('libc.so.6', use_errno=True)
+
+ result = libc.mlockall(
+ MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
+ )
+ if result != 0:
+ result = libc.mlockall(
+ MCL_CURRENT | MCL_FUTURE
+ )
+ if result != 0:
+ log('WARNING: cannot lock all memory')
+ else:
+ pass
+ # log('All memory locked with MCL_CURRENT | MCL_FUTURE')
+ else:
+ pass
+ # log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
+
+
+def update_stat_dict_and_print(key):
+ """
+ """
+
+ if key is not None:
+
+ if key not in stat_dict:
+
+ stat_dict.update({key: 1})
+
+ else:
+
+ new_value = stat_dict[key] + 1
+ stat_dict.update({key: new_value})
+
+ if print_statistics:
+
+ stats_msg = 'Total stat (what happened in the last {}):'.format(
+ format_time(time() - start_time))
+
+ for i in stat_dict:
+ stats_msg += '\n {}: {}'.format(i, stat_dict[i])
+
+ log(stats_msg)
+
+
+def find_psi_metrics_value(psi_path, psi_metrics):
+ """
+ """
+
+ if psi_support:
+
+ if psi_metrics == 'some_avg10':
+ return float(rline1(psi_path).split(' ')[1].split('=')[1])
+ if psi_metrics == 'some_avg60':
+ return float(rline1(psi_path).split(' ')[2].split('=')[1])
+ if psi_metrics == 'some_avg300':
+ return float(rline1(psi_path).split(' ')[3].split('=')[1])
+
+ if psi_metrics == 'full_avg10':
+ with open(psi_path) as f:
+ psi_list = f.readlines()
+ return float(psi_list[1].split(' ')[1].split('=')[1])
+ if psi_metrics == 'full_avg60':
+ with open(psi_path) as f:
+ psi_list = f.readlines()
+ return float(psi_list[1].split(' ')[2].split('=')[1])
+ if psi_metrics == 'full_avg300':
+ with open(psi_path) as f:
+ psi_list = f.readlines()
+ return float(psi_list[1].split(' ')[3].split('=')[1])
+
+
+def check_mem_and_swap():
+ """find mem_available, swap_total, swap_free"""
+ with open('/proc/meminfo') as f:
+ for n, line in enumerate(f):
+ if n == 2:
+ mem_available = int(line.split(':')[1][:-4])
+ continue
+ if n is swap_total_index:
+ swap_total = int(line.split(':')[1][:-4])
+ continue
+ if n is swap_free_index:
+ swap_free = int(line.split(':')[1][:-4])
+ break
+ return mem_available, swap_total, swap_free
+
+
+def check_zram():
+ """find MemUsedZram"""
+ disksize_sum = 0
+ mem_used_total_sum = 0
+
+ for dev in os.listdir('/sys/block'):
+ if dev.startswith('zram'):
+ stat = zram_stat(dev)
+ disksize_sum += int(stat[0])
+ mem_used_total_sum += int(stat[1])
+
+ # Means that when setting zram disksize = 1 GiB available memory
+ # decrease by 0.0042 GiB.
+ # Found experimentally, requires clarification with different kernaels and
+ # architectures.
+ # On small disk drives (up to gigabyte) it can be more, up to 0.0045.
+ # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should
+ # be 0.001:
+ # ("zram uses about 0.1% of the size of the disk"
+ # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
+ # but this statement contradicts the experimental data.
+ # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize
+ # Found experimentally.
+ ZRAM_DISKSIZE_FACTOR = 0.0042
+
+ return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
+
+
+'''
+def format_time(t):
+ t = int(t)
+ if t < 60:
+ return '{} sec'.format(t)
+ if t >= 60 and t < 3600:
+ m = t // 60
+ s = t % 60
+ return '{} min {} sec'.format(m, s)
+ h = t // 3600
+ s0 = t - h * 3600
+ m = s0 // 60
+ s = s0 % 60
+ return '{} h {} min {} sec'.format(h, m, s)
+'''
+
+
+def format_time(t):
+ t = int(t)
+
+ if t < 60:
+ return '{} sec'.format(t)
+
+ if t > 3600:
+ h = t // 3600
+ s0 = t - h * 3600
+ m = s0 // 60
+ s = s0 % 60
+ return '{} h {} min {} sec'.format(h, m, s)
+
+ m = t // 60
+ s = t % 60
+ return '{} min {} sec'.format(m, s)
+
+
+def string_to_float_convert_test(string):
+ """Try to interprete string values as floats."""
+ try:
+ return float(string)
+ except ValueError:
+ return None
+
+
+def string_to_int_convert_test(string):
+ """Try to interpret string values as integers."""
+ try:
+ return int(string)
+ except ValueError:
+ return None
+
+
+def conf_parse_string(param):
+ """
+ Get string parameters from the config dict.
+
+ param: config_dict key
+ returns config_dict[param].strip()
+ """
+ if param in config_dict:
+ return config_dict[param].strip()
+ else:
+ errprint('All the necessary parameters must be in the config')
+ errprint('There is no "{}" parameter in the config'.format(param))
+ exit(1)
+
+
+def conf_parse_bool(param):
+ """
+ Get bool parameters from the config_dict.
+
+ param: config_dicst key
+ returns bool
+ """
+ if param in config_dict:
+ param_str = config_dict[param]
+ if param_str == 'True':
+ return True
+ elif param_str == 'False':
+ return False
+ else:
+ errprint('Invalid value of the "{}" parameter.'.format(param))
+ errprint('Valid values are True and False.')
+ errprint('Exit')
+ exit(1)
+ else:
+ errprint('All the necessary parameters must be in the config')
+ errprint('There is no "{}" parameter in the config'.format(param))
+ exit(1)
+
+
+def rline1(path):
+ """read 1st line from path."""
+ try:
+ with open(path) as f:
+ for line in f:
+ return line[:-1]
+ except UnicodeDecodeError:
+ with open(path, 'rb') as f:
+ return f.read(999).decode(
+ 'utf-8', 'ignore').split('\n')[0] # use partition()!
+
+
+def kib_to_mib(num):
+ """Convert KiB values to MiB values."""
+ return round(num / 1024.0)
+
+
+def percent(num):
+ """Interprete num as percentage."""
+ return round(num * 100, 1)
+
+
+def just_percent_mem(num):
+ """convert num to percent and justify"""
+ return str(round(num * 100, 1)).rjust(4, ' ')
+
+
+def just_percent_swap(num):
+ """
+ """
+ return str(round(num * 100, 1)).rjust(5, ' ')
+
+
+def human(num, lenth):
+ """Convert KiB values to MiB values with right alignment"""
+ return str(round(num / 1024)).rjust(lenth, ' ')
+
+
+def zram_stat(zram_id):
+ """
+ Get zram state.
+
+ zram_id: str zram block-device id
+ returns bytes disksize, str mem_used_total
+ """
+ try:
+ disksize = rline1('/sys/block/' + zram_id + '/disksize')
+ except FileNotFoundError:
+ return '0', '0'
+ if disksize == ['0\n']:
+ return '0', '0'
+ try:
+ mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ')
+ mm_stat_list = []
+ for i in mm_stat:
+ if i != '':
+ mm_stat_list.append(i)
+ mem_used_total = mm_stat_list[2]
+ except FileNotFoundError:
+ mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total')
+ return disksize, mem_used_total # BYTES, str
+
+
+def send_notify_warn():
+ """
+ Look for process with maximum 'badness' and warn user with notification.
+ (implement Low memory warnings)
+ """
+ log('Warning threshold exceeded')
+
+ if check_warning_exe:
+ exe(warning_exe)
+
+ else:
+
+ title = 'Low memory'
+
+ body = 'MemAvail: {}%\nSwapFree: {}%'.format(
+ round(mem_available / mem_total * 100),
+ round(swap_free / (swap_total + 0.1) * 100)
+ )
+
+ send_notification(title, body)
+
+
+def send_notify(threshold, name, pid):
+ """
+ Notificate about OOM Preventing.
+
+ threshold: key for notify_sig_dict
+ name: str process name
+ pid: str process pid
+ """
+
+ title = 'Freeze prevention'
+ body = '{} [{}] {}'.format(
+ notify_sig_dict[threshold],
+ pid,
+ name.replace(
+ # symbol '&' can break notifications in some themes,
+ # therefore it is replaced by '*'
+ '&', '*'
+ )
+ )
+
+ send_notification(title, body)
+
+
+def send_notify_etc(pid, name, command):
+ """
+ Notificate about OOM Preventing.
+
+ command: str command that will be executed
+ name: str process name
+ pid: str process pid
+ """
+ title = 'Freeze prevention'
+ body = 'Victim is [{}] {}\nExecute the co' \
+ 'mmand:\n{}'.format(
+ pid, name.replace('&', '*'), command.replace('&', '*'))
+
+ send_notification(title, body)
+
+
+def send_notification(title, body):
+ """
+ """
+ cmd = '{} "--euid={}" "--debug={}" "--title={}" "--body={}" &'.format(
+ notify_helper_path,
+ self_uid,
+ debug_gui_notifications,
+ title,
+ encoder(body))
+
+ go(exe, cmd)
+
+
+def get_pid_list():
+ """
+ Find pid list expect kthreads and zombies
+ """
+ pid_list = []
+ for pid in os.listdir('/proc'):
+ if os.path.exists('/proc/' + pid + '/exe'):
+ pid_list.append(pid)
+ return pid_list
+
+
+def get_non_decimal_pids():
+ """
+ """
+ non_decimal_list = []
+ for pid in pid_list:
+ if pid[0].isdecimal() is False:
+ non_decimal_list.append(pid)
+ return non_decimal_list
+
+
+def find_victim(_print_proc_table):
+ """
+ Find the process with highest badness and its badness adjustment
+ Return pid and badness
+ """
+
+ ft1 = time()
+
+ pid_list = get_pid_list()
+
+ pid_list.remove(self_pid)
+
+ if '1' in pid_list:
+ pid_list.remove('1')
+
+ non_decimal_list = get_non_decimal_pids()
+
+ for i in non_decimal_list:
+ if i in pid_list:
+ pid_list.remove(i)
+
+ pid_badness_list = []
+
+ if _print_proc_table:
+
+ if extra_table_info == 'None':
+ extra_table_title = ''
+
+ elif extra_table_info == 'cgroup_v1':
+ extra_table_title = 'CGroup_v1'
+
+ elif extra_table_info == 'cgroup_v2':
+ extra_table_title = 'CGroup_v2'
+
+ elif extra_table_info == 'cmdline':
+ extra_table_title = 'cmdline'
+
+ elif extra_table_info == 'environ':
+ extra_table_title = 'environ'
+
+ elif extra_table_info == 'realpath':
+ extra_table_title = 'realpath'
+
+ else:
+ extra_table_title = ''
+
+ hr = '#' * 107
+
+ log(hr)
+ log('# PID PPID badness oom_score oom_score_adj e'
+ 'UID S VmSize VmRSS VmSwap Name {}'.format(
+ extra_table_title))
+ log('#------- ------- ------- --------- ------------- -------'
+ '--- - ------ ----- ------ ---------------')
+
+ for pid in pid_list:
+
+ badness = pid_to_badness(pid)[0]
+
+ if badness is None:
+ continue
+
+ if _print_proc_table:
+
+ try:
+ oom_score = rline1('/proc/' + pid + '/oom_score')
+ oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
+ except FileNotFoundError:
+ continue
+
+ if pid_to_status(pid) is None:
+ continue
+ else:
+ (name, state, ppid, uid, vm_size, vm_rss,
+ vm_swap) = pid_to_status(pid)
+
+ if extra_table_info == 'None':
+ extra_table_line = ''
+
+ elif extra_table_info == 'cgroup_v1':
+ extra_table_line = pid_to_cgroup_v1(pid)
+
+ elif extra_table_info == 'cgroup_v2':
+ extra_table_line = pid_to_cgroup_v2(pid)
+
+ elif extra_table_info == 'cmdline':
+ extra_table_line = pid_to_cmdline(pid)
+
+ elif extra_table_info == 'environ':
+ extra_table_line = pid_to_environ(pid)
+
+ elif extra_table_info == 'realpath':
+ extra_table_line = pid_to_realpath(pid)
+
+ else:
+ extra_table_line = ''
+
+ log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format(
+ pid.rjust(7),
+ ppid.rjust(7),
+ str(badness).rjust(7),
+ oom_score.rjust(9),
+ oom_score_adj.rjust(13),
+ uid.rjust(10),
+ state,
+ str(vm_size).rjust(6),
+ str(vm_rss).rjust(5),
+ str(vm_swap).rjust(6),
+ name.ljust(15),
+ extra_table_line
+ )
+ )
+
+ pid_badness_list.append((pid, badness))
+
+ real_proc_num = len(pid_badness_list)
+
+ # Make list of (pid, badness) tuples, sorted by 'badness' values
+ # print(pid_badness_list)
+ pid_tuple_list = sorted(
+ pid_badness_list,
+ key=itemgetter(1),
+ reverse=True
+ )[0]
+
+ pid = pid_tuple_list[0]
+ victim_id = get_victim_id(pid)
+
+ # Get maximum 'badness' value
+ victim_badness = pid_tuple_list[1]
+ victim_name = pid_to_name(pid)
+
+ if _print_proc_table:
+ log(hr)
+
+ log('Found {} processes with existing /proc/[pid]/exe realpath'.format(
+ real_proc_num))
+
+ log(
+ 'Process with highest badness (found in {} ms):\n PID: {}, Na'
+ 'me: {}, badness: {}'.format(
+ round((time() - ft1) * 1000),
+ pid,
+ victim_name,
+ victim_badness
+ )
+ )
+
+ return pid, victim_badness, victim_name, victim_id
+
+
+def find_victim_info(pid, victim_badness, name):
+ """
+ """
+ status0 = time()
+
+ try:
+
+ with open('/proc/' + pid + '/status') as f:
+
+ for n, line in enumerate(f):
+
+ if n is state_index:
+ state = line.split('\t')[1].rstrip()
+ continue
+
+ """
+ if n is ppid_index:
+ # ppid = line.split('\t')[1]
+ continue
+ """
+
+ if n is uid_index:
+ uid = line.split('\t')[2]
+ continue
+
+ if n is vm_size_index:
+ vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is vm_rss_index:
+ vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
+ continue
+
+ if detailed_rss:
+
+ if n is anon_index:
+ anon_rss = kib_to_mib(
+ int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is file_index:
+ file_rss = kib_to_mib(
+ int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is shmem_index:
+ shmem_rss = kib_to_mib(
+ int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is vm_swap_index:
+ vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
+ break
+
+ if print_victim_cmdline:
+ cmdline = pid_to_cmdline(pid)
+ oom_score = rline1('/proc/' + pid + '/oom_score')
+ oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
+
+ except FileNotFoundError:
+ log('The victim died in the search process: FileNotFoundError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: FileNotFoundError')
+ return None
+ except ProcessLookupError:
+ log('The victim died in the search process: ProcessLookupError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: ProcessLookupError')
+ return None
+ except UnicodeDecodeError:
+
+ with open('/proc/' + pid + '/status', 'rb') as f:
+ f_list = f.read().decode('utf-8', 'ignore').split('\n')
+
+ for i in range(len(f_list)):
+
+ if i is state_index:
+ state = f_list[i].split('\t')[1].rstrip()
+
+ """
+ if i is ppid_index:
+ pass
+ # ppid = f_list[i].split('\t')[1]
+ """
+
+ if i is uid_index:
+ uid = f_list[i].split('\t')[2]
+
+ if i is vm_size_index:
+ vm_size = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is vm_rss_index:
+ vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
+
+ if detailed_rss:
+
+ if i is anon_index:
+ anon_rss = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is file_index:
+ file_rss = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is shmem_index:
+ shmem_rss = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is vm_swap_index:
+ vm_swap = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if print_victim_cmdline:
+ cmdline = pid_to_cmdline(pid)
+ oom_score = rline1('/proc/' + pid + '/oom_score')
+ oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
+
+ except IndexError:
+ log('The victim died in the search process: IndexError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: IndexError')
+ return None
+ except ValueError:
+ log('The victim died in the search process: ValueError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: ValueError')
+ return None
+ except FileNotFoundError:
+ log('The victim died in the search process: FileNotFoundError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: FileNotFoundError')
+ return None
+ except ProcessLookupError:
+ log('The victim died in the search process: ProcessLookupError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: ProcessLookupError')
+ return None
+
+ len_vm = len(str(vm_size))
+
+ try:
+ realpath = os.path.realpath('/proc/' + pid + '/exe')
+ victim_lifetime = format_time(uptime() - pid_to_starttime(pid))
+ victim_cgroup_v1 = pid_to_cgroup_v1(pid)
+ victim_cgroup_v2 = pid_to_cgroup_v2(pid)
+
+ except FileNotFoundError:
+ log('The victim died in the search process: FileNotFoundError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: FileNotFoundError')
+ return None
+
+ ancestry = pid_to_ancestry(pid, max_victim_ancestry_depth)
+
+ if print_victim_cmdline is False:
+ cmdline = ''
+ c1 = ''
+ else:
+ c1 = '\n Cmdline: '
+
+ if detailed_rss:
+ detailed_rss_info = ' (' \
+ 'Anon: {} MiB, ' \
+ 'File: {} MiB, ' \
+ 'Shmem: {} MiB)'.format(
+ anon_rss,
+ file_rss,
+ shmem_rss)
+ else:
+ detailed_rss_info = ''
+
+ victim_info = 'Victim status (found in {} ms):' \
+ '\n Name: {}' \
+ '\n State: {}' \
+ '\n PID: {}' \
+ '{}' \
+ '\n EUID: {}' \
+ '\n badness: {}, ' \
+ 'oom_score: {}, ' \
+ 'oom_score_adj: {}' \
+ '\n VmSize: {} MiB' \
+ '\n VmRSS: {} MiB {}' \
+ '\n VmSwap: {} MiB' \
+ '\n CGroup_v1: {}' \
+ '\n CGroup_v2: {}' \
+ '\n Realpath: {}' \
+ '{}{}' \
+ '\n Lifetime: {}'.format(
+ round((time() - status0) * 1000),
+ name,
+ state,
+ pid,
+ ancestry,
+ uid,
+ victim_badness,
+ oom_score,
+ oom_score_adj,
+ vm_size,
+ str(vm_rss).rjust(len_vm),
+ detailed_rss_info,
+ str(vm_swap).rjust(len_vm),
+ victim_cgroup_v1,
+ victim_cgroup_v2,
+ realpath,
+ c1, cmdline,
+ victim_lifetime)
+
+ return victim_info
+
+
+def check_mem_swap_ex():
+ """
+ Check: is mem and swap threshold exceeded?
+ Return: None, (SIGTERM, meminfo), (SIGKILL, meminfo)
+ """
+
+ mem_available, swap_total, swap_free = check_mem_and_swap()
+
+ # if hard_threshold_min_swap is set in percent
+ if swap_kill_is_percent:
+ hard_threshold_min_swap_kb = swap_total * \
+ hard_threshold_min_swap_percent / 100.0
+ else:
+ hard_threshold_min_swap_kb = swap_kb_dict['hard_threshold_min_swap_kb']
+
+ if swap_term_is_percent:
+ soft_threshold_min_swap_kb = swap_total * \
+ soft_threshold_min_swap_percent / 100.0
+ else:
+ soft_threshold_min_swap_kb = swap_kb_dict['soft_threshold_min_swap_kb']
+
+ if swap_warn_is_percent:
+ warning_threshold_min_swap_kb = swap_total * \
+ warning_threshold_min_swap_percent / 100.0
+ else:
+ warning_threshold_min_swap_kb = swap_kb_dict['warning_threshold_min_swap_kb']
+
+ if swap_total > hard_threshold_min_swap_kb:
+ swap_sigkill_pc = percent(
+ hard_threshold_min_swap_kb / (swap_total + 0.1))
+ else:
+ swap_sigkill_pc = '-'
+
+ if swap_total > soft_threshold_min_swap_kb:
+ swap_sigterm_pc = percent(
+ soft_threshold_min_swap_kb / (swap_total + 0.1))
+ else:
+ swap_sigterm_pc = '-'
+
+ if (mem_available <= hard_threshold_min_mem_kb and
+ swap_free <= hard_threshold_min_swap_kb):
+
+ mem_info = 'Memory status that requ' \
+ 'ires corrective actions (hard threshold exceeded):' \
+ '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
+ 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
+ 'p_min_sigkill [{} MiB, {} %]'.format(
+ kib_to_mib(mem_available),
+ percent(mem_available / mem_total),
+ kib_to_mib(hard_threshold_min_mem_kb),
+ percent(hard_threshold_min_mem_kb / mem_total),
+ kib_to_mib(swap_free),
+ percent(swap_free / (swap_total + 0.1)),
+ kib_to_mib(hard_threshold_min_swap_kb),
+ swap_sigkill_pc)
+
+ return (SIGKILL, mem_info, mem_available, hard_threshold_min_swap_kb,
+ soft_threshold_min_swap_kb, swap_free, swap_total)
+
+ if (mem_available <= soft_threshold_min_mem_kb and
+ swap_free <= soft_threshold_min_swap_kb):
+
+ mem_info = 'Memory status that requi' \
+ 'res corrective actions (soft threshold exceeded):' \
+ '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
+ 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
+ 'p_min_sigterm [{} MiB, {} %]'.format(
+ kib_to_mib(mem_available),
+ percent(mem_available / mem_total),
+ kib_to_mib(soft_threshold_min_mem_kb),
+ round(soft_threshold_min_mem_percent, 1),
+ kib_to_mib(swap_free),
+ percent(swap_free / (swap_total + 0.1)),
+ kib_to_mib(soft_threshold_min_swap_kb),
+ swap_sigterm_pc)
+
+ return (SIGTERM, mem_info, mem_available, hard_threshold_min_swap_kb,
+ soft_threshold_min_swap_kb, swap_free, swap_total)
+
+ if low_memory_warnings_enabled:
+
+ if (mem_available <= warning_threshold_min_mem_kb and swap_free <=
+ warning_threshold_min_swap_kb + 0.1):
+ return ('WARN', None, mem_available, hard_threshold_min_swap_kb,
+ soft_threshold_min_swap_kb, swap_free, swap_total)
+
+ return (None, None, mem_available, hard_threshold_min_swap_kb,
+ soft_threshold_min_swap_kb, swap_free, swap_total)
+
+
+def check_zram_ex():
+ """
+ """
+ mem_used_zram = check_zram()
+
+ if mem_used_zram >= hard_threshold_max_zram_kb:
+
+ mem_info = 'Memory status that requir' \
+ 'es corrective actions (hard threshold exceeded):' \
+ '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
+ 'kill [{} MiB, {} %]'.format(
+ kib_to_mib(mem_used_zram),
+ percent(mem_used_zram / mem_total),
+ kib_to_mib(hard_threshold_max_zram_kb),
+ percent(hard_threshold_max_zram_kb / mem_total))
+
+ return SIGKILL, mem_info, mem_used_zram
+
+ if mem_used_zram >= soft_threshold_max_zram_kb:
+
+ mem_info = 'Memory status that requires corrective actions (soft th' \
+ 'reshold exceeded):\n MemUsedZram [{} MiB, {} %] >= zram_max_s' \
+ 'igterm [{} M, {} %]'.format(
+ kib_to_mib(mem_used_zram),
+ percent(mem_used_zram / mem_total),
+ kib_to_mib(soft_threshold_max_zram_kb),
+ percent(soft_threshold_max_zram_kb / mem_total))
+
+ return SIGTERM, mem_info, mem_used_zram
+
+ if low_memory_warnings_enabled:
+ if mem_used_zram >= warning_threshold_max_zram_kb:
+ return 'WARN', None, mem_used_zram
+
+ return None, None, mem_used_zram
+
+
+def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0):
+ """
+ """
+
+ delta0 = time() - x0
+ x0 = time()
+
+ psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
+ # print(psi_avg_value)
+
+ psi_post_action_delay_timer = time() - last_action_dict['t'] # psi_t0
+
+ if psi_post_action_delay_timer >= psi_post_action_delay:
+ psi_post_action_delay_exceeded = True
+ else:
+ psi_post_action_delay_exceeded = False
+
+ if psi_avg_value >= hard_threshold_max_psi:
+ sigkill_psi_exceeded = True
+ psi_kill_exceeded_timer += delta0
+ else:
+ sigkill_psi_exceeded = False
+ psi_kill_exceeded_timer = 0
+
+ if debug_psi:
+
+ log('psi_post_action_delay_timer: {}'.format(
+ round(psi_post_action_delay_timer, 3)))
+
+ log('psi_post_action_delay_exceeded: {}\nsigkill_psi_exceeded'
+ ': {}\npsi_kill_exceeded_timer: {}'.format(
+ psi_post_action_delay_exceeded,
+ sigkill_psi_exceeded,
+ round(psi_kill_exceeded_timer, 1)
+ )
+ )
+
+ if (psi_kill_exceeded_timer >= psi_excess_duration and
+ psi_post_action_delay_exceeded):
+
+ mem_info = 'PSI avg ({}) > hard_threshold_max_psi ({})\n' \
+ 'PSI avg exceeded psi_excess_duration (value' \
+ ' = {} sec) for {} seconds'.format(
+ psi_avg_value,
+ hard_threshold_max_psi,
+ psi_excess_duration,
+ round(psi_kill_exceeded_timer, 1)
+ )
+
+ return (SIGKILL, mem_info, psi_t0, psi_kill_exceeded_timer,
+ psi_term_exceeded_timer, x0)
+
+ if psi_avg_value >= soft_threshold_max_psi:
+ sigterm_psi_exceeded = True
+ psi_term_exceeded_timer += delta0
+ else:
+ sigterm_psi_exceeded = False
+ psi_term_exceeded_timer = 0
+
+ if debug_psi:
+
+ log('sigterm_psi_exceeded: {}\n'
+ 'psi_term_exceeded_timer: {}\n'.format(
+ sigterm_psi_exceeded,
+ round(psi_term_exceeded_timer, 1)
+ )
+ )
+
+ if (psi_term_exceeded_timer >= psi_excess_duration and
+ psi_post_action_delay_exceeded):
+
+ mem_info = 'PSI avg ({}) > soft_threshold_max_psi ({})\n' \
+ 'PSI avg exceeded psi_excess_duration (value' \
+ ' = {} sec) for {} seconds'.format(
+ psi_avg_value,
+ soft_threshold_max_psi,
+ psi_excess_duration,
+ round(psi_term_exceeded_timer, 1)
+ )
+
+ return (SIGTERM, mem_info, psi_t0, psi_kill_exceeded_timer,
+ psi_term_exceeded_timer, x0)
+
+ if low_memory_warnings_enabled:
+
+ if psi_avg_value >= warning_threshold_max_psi:
+ return ('WARN', None, psi_t0, psi_kill_exceeded_timer,
+ psi_term_exceeded_timer, x0)
+
+ return (None, None, psi_t0, psi_kill_exceeded_timer,
+ psi_term_exceeded_timer, x0)
+
+
+def is_victim_alive(victim_id):
+ """
+ We do not have a reliable sign of the end of the release of memory:
+ https://github.com/rfjakob/earlyoom/issues/128#issuecomment-507023717
+
+ Варианты возврата:
+ 0 X, nonexist, другой процесс (полн конец имплементации, можно не делать POST SIGKILL DELAY)
+ 1 rp true
+ 2 R освобождает память. Ждем смерти.
+ 3 Z возможно уже освободил память. Конец отслеживания
+ """
+
+ # Проверка целостности жертвы
+ starttime, pid = victim_id.split('_pid')
+ new_victim_id = get_victim_id(pid)
+ if victim_id != new_victim_id:
+ return 0
+
+ # Жива ли жертва?
+ exe_exists = os.path.exists('/proc/{}/exe'.format(pid))
+ if exe_exists:
+ return 1
+
+ # далее жертва смертельно ранена. Дифференцируемся по State.
+ # R -> 2 # отслеживать жертву дальше
+ # X, FNFE, PLE -> 0
+
+ state = pid_to_state(pid)
+
+ if state == 'R':
+ return 2
+
+ if state == 'Z':
+ return 3
+
+ if state == 'X' or state == '':
+ return 0
+
+ return 0
+
+
+def implement_corrective_action(
+ threshold,
+ mem_info_list,
+ psi_t0,
+ psi_kill_exceeded_timer,
+ psi_term_exceeded_timer,
+ x0,
+ psi_threshold,
+ zram_threshold,
+ zram_info,
+ psi_info):
+
+ log('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
+
+ debug_corrective_action = True
+
+ time0 = time()
+
+ # 1. Очистка словаря от мертвых. Итерация по словарю, отслеживание умирающих.
+ # 2. Итерация по оставшемуся словарю. Поиск дельт. Если хоть у одного
+ # дельта НЕ истекла - ЖДЕМ, выход из фции.
+
+ # print(v_dict)
+ nu = []
+
+ for victim_id in v_dict:
+ iva = is_victim_alive(victim_id)
+ #print(iva, victim_id)
+ if iva == 0 or iva == 3:
+ nu.append(victim_id)
+ """
+ continue
+ if iva == 1:
+ continue
+ if iva == 2:
+ pass # быстро отследить умирающего
+ """
+
+ for i in nu:
+ if debug_corrective_action:
+ log('Remove {} from v_dict'.format(i))
+ v_dict.pop(i)
+
+ x = False
+ cache_list = []
+ #cache_list.append(('foo', 0.01))
+ #cache_list.append(('boo', 1111.01))
+ # 2
+ # print(v_dict)
+
+ for victim_id in v_dict:
+ tx = v_dict[victim_id]['time']
+ ddt = time() - tx
+ if ddt < victim_cache_time:
+
+ if debug_corrective_action:
+ log(
+ 'victim_cache_time is not exceeded for {} ({} < {})'.format(
+ victim_id, round(ddt, 3), victim_cache_time
+ )
+ )
+ x = True
+ cache_list.append((victim_id, ddt))
+ break
+
+ if x:
+ # print(cache_list)
+ e = sorted(cache_list, key=itemgetter(1), reverse=False)
+ cached_victim_id = e[0][0]
+
+ for i in mem_info_list:
+ log(i)
+
+ if x:
+ victim_id = cached_victim_id
+ pid = victim_id.partition('_pid')[2]
+ victim_badness = pid_to_badness(pid)[0]
+ name = v_dict[victim_id]['name']
+ log('New victim is cached victim {} ({})'.format(pid, name))
+ else:
+ pid, victim_badness, name, victim_id = find_victim(print_proc_table)
+
+ log('Recheck memory levels...')
+
+ (masf_threshold, masf_info, mem_available, hard_threshold_min_swap_kb,
+ soft_threshold_min_swap_kb, swap_free, swap_total) = check_mem_swap_ex()
+
+ if CHECK_ZRAM:
+ zram_threshold, zram_info, mem_used_zram = check_zram_ex()
+
+ if CHECK_PSI:
+ (psi_threshold, psi_info, psi_t0, psi_kill_exceeded_timer,
+ psi_term_exceeded_timer, x0) = check_psi_ex(
+ psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0)
+
+ if (masf_threshold is SIGKILL or zram_threshold is SIGKILL or
+ psi_threshold is SIGKILL):
+
+ new_threshold = SIGKILL
+ mem_info_list = []
+
+ if masf_threshold is SIGKILL or masf_threshold is SIGTERM:
+ mem_info_list.append(masf_info)
+
+ if zram_threshold is SIGKILL or zram_threshold is SIGTERM:
+ mem_info_list.append(zram_info)
+
+ if psi_threshold is SIGKILL or psi_threshold is SIGTERM:
+ mem_info_list.append(psi_info)
+
+ elif (masf_threshold is SIGTERM or zram_threshold is SIGTERM or
+ psi_threshold is SIGTERM):
+
+ new_threshold = SIGTERM
+ mem_info_list = []
+
+ if masf_threshold is SIGKILL or masf_threshold is SIGTERM:
+ mem_info_list.append(masf_info)
+
+ if zram_threshold is SIGKILL or zram_threshold is SIGTERM:
+ mem_info_list.append(zram_info)
+
+ if psi_threshold is SIGKILL or psi_threshold is SIGTERM:
+ mem_info_list.append(psi_info)
+
+ else:
+ log('Thresholds is not exceeded now')
+ return psi_t0
+
+ for i in mem_info_list:
+ log(i)
+
+ if new_threshold is None or new_threshold == 'WARN':
+ log('Thresholds is not exceeded now')
+ return psi_t0
+
+ threshold = new_threshold
+
+ vwd = None # Victim Will Die
+
+ if victim_badness >= min_badness:
+
+ if threshold is SIGTERM:
+ if victim_id in v_dict:
+ dt = time() - v_dict[victim_id]['time']
+ if dt > max_soft_exit_time:
+ log('max_soft_exit_time is exceeded: the '
+ 'victim will get SIGKILL')
+ threshold = SIGKILL
+ else:
+ log('max_soft_exit_time is not exceeded ('
+ '{} < {}) for the victim'.format(round(
+ dt, 1), max_soft_exit_time))
+
+ if debug_sleep:
+ log('Sleep {} sec (over_sleep)'.format(over_sleep))
+ sleep(over_sleep)
+
+ log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
+
+ return psi_t0
+
+ # log('Try to implement a corrective action...')
+
+ if print_victim_status:
+ # victim badness ищи снова, не полагайся на старое
+ victim_info = find_victim_info(pid, victim_badness, name)
+ log(victim_info)
+
+ soft_match = False
+ if soft_actions and threshold is SIGTERM:
+ name = pid_to_name(pid)
+ cgroup_v1 = pid_to_cgroup_v1(pid)
+ service = ''
+ cgroup_v1_tail = cgroup_v1.rpartition('/')[2]
+ if cgroup_v1_tail.endswith('.service'):
+ service = cgroup_v1_tail
+ for i in soft_actions_list:
+ unit = i[0]
+ if unit == 'name':
+ u = name
+ else:
+ u = cgroup_v1
+ regexp = i[1]
+ command = i[2]
+
+ if search(regexp, u) is not None:
+ log("Regexp '{}' matches with {} '{}'".format(
+ regexp, unit, u))
+ soft_match = True
+ break
+
+ if soft_match:
+
+ cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name(
+ pid)).replace('$SERVICE', service)
+ go(exe, cmd)
+
+ """
+ if exit_status == 0:
+ success = True
+ else:
+ success = False
+ """
+
+ response_time = time() - time0
+
+ exit_status = None
+
+ preventing_oom_message = 'Implement a corrective act' \
+ 'ion:\n Run the command: {}' \
+ '\n Exit status: {}; total response ' \
+ 'time: {} ms'.format(
+ cmd,
+ exit_status,
+ round(response_time * 1000))
+
+ else:
+
+ try:
+ os.kill(int(pid), threshold)
+
+ response_time = time() - time0
+
+ send_result = 'total response time: {} ms'.format(
+ round(response_time * 1000))
+
+ preventing_oom_message = 'Implement a corrective action:' \
+ '\n Send {} to the victim; {}'.format(
+ sig_dict[threshold], send_result)
+
+ # success = True
+
+ if threshold is SIGKILL:
+ vwd = True
+
+ except FileNotFoundError:
+ vwd = True
+ # success = False
+ # response_time = time() - time0
+ # send_result = 'no such process; response time: {} ms'.format(round(response_time * 1000))
+ key = 'The victim died in the search process: ' \
+ 'FileNotFoundError'
+ except ProcessLookupError:
+ vwd = True
+ # success = False
+ # response_time = time() - time0
+ # send_result = 'no such process; response time: {} ms'.format(round(response_time * 1000))
+ key = 'The victim died in the search process: ' \
+ 'ProcessLookupError'
+
+ try:
+ log(preventing_oom_message)
+ except UnboundLocalError:
+ pass
+ # preventing_oom_message = key
+
+ if not vwd:
+ if victim_id not in v_dict:
+ v_dict[victim_id] = dict()
+ v_dict[victim_id]['time'] = time()
+ v_dict[victim_id]['name'] = name
+ else:
+ pass
+
+ last_action_dict['t'] = kill_timestamp = time()
+
+ # print(v_dict)
+
+ # response_time = time() - time0
+
+ # log('success: ' + str(success))
+ # log('victim will die: ' + str(vwd))
+ # log('response_time: ' + str(response_time) + ' sec')
+
+ # НАЧАЛО ОТСЛЕЖИВАНИЯ СОСТОЯНИЯ ЖЕРТВЫ. Можно вынести в отд фц. Приним
+ # айди, логирует, возвращает что-то.
+
+ # Далее поработать со словарями. Жертва тут умерла - сброс таймера. Все
+ # старые жертвы умерли до 3х секунд с следующих циклах - сброс таймера.
+ # После этого все должно быть супер охуенно.
+
+ while True:
+ sleep(0.005)
+ d = time() - kill_timestamp
+ #print('Прошло времени:', d)
+ iva = is_victim_alive(victim_id)
+
+ if iva == 0:
+
+ log('The victim died in {} sec'.format(round(d, 3)))
+
+ if victim_id in v_dict:
+ v_dict.pop(victim_id)
+ break
+
+ elif iva == 1:
+ #print('Жива и занимает память')
+ if not vwd and d > sensitivity_test_time:
+
+ log("The victim doesn't respond on corrective action in {} sec".format(
+ round(d, 3)))
+
+ break
+
+ elif iva == 2:
+ pass
+ #print('Смертельно ранена и освобождает память. Дождаться окончания освобождения памяти.')
+
+ else: # 3
+ #print('Z и быстро освобождает память, если еще не. Поспать немножно и выйти из цикла.')
+
+ log('The victim became a zombie in {} sec'.format(round(d, 3)))
+
+ if victim_id in v_dict:
+ v_dict.pop(victim_id)
+ sleep(post_zombie_delay)
+ break
+
+ mem_available, swap_total, swap_free = check_mem_and_swap()
+ ma_mib = int(mem_available) / 1024.0
+ sf_mib = int(swap_free) / 1024.0
+ log('Memory status after implementing a corrective act'
+ 'ion:\n MemAvailable'
+ ': {} MiB, SwapFree: {} MiB'.format(
+ round(ma_mib, 1), round(sf_mib, 1)))
+
+ if soft_match is False:
+ key = 'Send {} to {}'.format(sig_dict[threshold], name)
+ update_stat_dict_and_print(key)
+ else:
+ key = "Run the command '{}'".format(command)
+ update_stat_dict_and_print(key)
+
+ if threshold is SIGKILL and post_kill_exe != '':
+
+ cmd = post_kill_exe.replace('$PID', pid).replace(
+ '$NAME', pid_to_name(pid))
+
+ log('Execute post_kill_exe')
+
+ go(exe, cmd)
+
+ if post_action_gui_notifications:
+ if soft_match:
+ send_notify_etc(pid, name, cmd)
+ else:
+ send_notify(threshold, name, pid)
+
+ else:
+
+ response_time = time() - time0
+ victim_badness_is_too_small = 'victim badness ({}) < min_b' \
+ 'adness ({}); nothing to do; response time: {} ms'.format(
+ victim_badness,
+ min_badness,
+ round(response_time * 1000))
+
+ log(victim_badness_is_too_small)
+
+ # update stat_dict
+ key = 'victim badness < min_badness'
+ update_stat_dict_and_print(key)
+
+ if vwd is None:
+
+ if debug_sleep:
+ log('Sleep {} sec (over_sleep)'.format(over_sleep))
+ sleep(over_sleep)
+
+ log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
+
+ return psi_t0
+
+
+def sleep_after_check_mem():
+ """Specify sleep times depends on rates and avialable memory."""
+
+ if stable_sleep:
+
+ if debug_sleep:
+ log('Sleep {} sec'.format(min_sleep))
+ stdout.flush()
+ sleep(min_sleep)
+ return None
+
+ if hard_threshold_min_mem_kb < soft_threshold_min_mem_kb:
+ mem_point = mem_available - soft_threshold_min_mem_kb
+ else:
+ mem_point = mem_available - hard_threshold_min_mem_kb
+
+ if hard_threshold_min_swap_kb < soft_threshold_min_swap_kb:
+ swap_point = swap_free - soft_threshold_min_swap_kb
+ else:
+ swap_point = swap_free - hard_threshold_min_swap_kb
+
+ if swap_point < 0:
+ swap_point = 0
+
+ if mem_point < 0:
+ mem_point = 0
+
+ t_mem = mem_point / fill_rate_mem
+ t_swap = swap_point / fill_rate_swap
+
+ if CHECK_ZRAM:
+ t_zram = (mem_total * 0.8 - mem_used_zram) / fill_rate_zram
+ if t_zram < 0:
+ t_zram = 0
+ t_mem_zram = t_mem + t_zram
+ z = ', t_zram={}'.format(round(t_zram, 2))
+ else:
+ z = ''
+
+ t_mem_swap = t_mem + t_swap
+
+ if CHECK_ZRAM:
+
+ if t_mem_swap <= t_mem_zram:
+ t = t_mem_swap
+ else:
+ t = t_mem_zram
+ else:
+ t = t_mem_swap
+
+ if t > max_sleep:
+ t = max_sleep
+ elif t < min_sleep:
+ t = min_sleep
+ else:
+ pass
+
+ if debug_sleep:
+ log('Sleep {} sec (t_mem={}, t_swap={}{})'.format(round(t, 2), round(
+ t_mem, 2), round(t_swap, 2), z))
+
+ try:
+ stdout.flush()
+ except OSError:
+ pass
+
+ sleep(t)
+
+
+def calculate_percent(arg_key):
+ """
+ parse conf dict
+ Calculate mem_min_KEY_percent.
+
+ Try use this one)
+ arg_key: str key for config_dict
+ returns int mem_min_percent or NoneType if got some error
+ """
+
+ if arg_key in config_dict:
+ mem_min = config_dict[arg_key]
+
+ if mem_min.endswith('%'):
+ # truncate percents, so we have a number
+ mem_min_percent = mem_min[:-1].strip()
+ # then 'float test'
+ mem_min_percent = string_to_float_convert_test(mem_min_percent)
+ if mem_min_percent is None:
+ errprint('Invalid {} value, not float\nExit'.format(arg_key))
+ exit(1)
+ # Final validations...
+ if mem_min_percent < 0 or mem_min_percent > 100:
+ errprint(
+ '{}, as percents value, out of ran'
+ 'ge [0; 100]\nExit'.format(arg_key))
+ exit(1)
+
+ # soft_threshold_min_mem_percent is clean and valid float percentage. Can
+ # translate into Kb
+ mem_min_kb = mem_min_percent / 100 * mem_total
+ mem_min_mb = round(mem_min_kb / 1024)
+
+ elif mem_min.endswith('M'):
+ mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip())
+ if mem_min_mb is None:
+ errprint('Invalid {} value, not float\nExit'.format(arg_key))
+ exit(1)
+ mem_min_kb = mem_min_mb * 1024
+ if mem_min_kb > mem_total:
+ errprint(
+ '{} value can not be greater then MemT'
+ 'otal ({} MiB)\nExit'.format(
+ arg_key, round(
+ mem_total / 1024)))
+ exit(1)
+ mem_min_percent = mem_min_kb / mem_total * 100
+
+ else:
+ log('Invalid {} units in config.\n Exit'.format(arg_key))
+ exit(1)
+ mem_min_percent = None
+
+ else:
+ log('{} not in config\nExit'.format(arg_key))
+ exit(1)
+ mem_min_percent = None
+
+ return mem_min_kb, mem_min_mb, mem_min_percent
+
+
+##########################################################################
+
+
+# {victim_id : {'time': timestamp, 'name': name}
+v_dict = dict()
+
+
+start_time = time()
+
+
+help_mess = """usage: nohang [-h] [-v] [-p] [-c CONFIG] [-cc CONFIG]
+
+optional arguments:
+ -h, --help show this help message and exit
+ -v, --version print version
+ -p, --print-proc-table
+ print table of processes with their badness values
+ -c CONFIG, --config CONFIG
+ path to the config file, default values:
+ ./nohang.conf, /etc/nohang/nohang.conf
+ -cc CONFIG, --check-config CONFIG
+ check and print config"""
+
+
+SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
+
+SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE'])
+
+conf_err_mess = 'Invalid config. Exit.'
+
+sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP]
+
+sig_dict = {
+ SIGKILL: 'SIGKILL',
+ SIGINT: 'SIGINT',
+ SIGQUIT: 'SIGQUIT',
+ SIGHUP: 'SIGHUP',
+ SIGTERM: 'SIGTERM'
+}
+
+self_pid = str(os.getpid())
+
+self_uid = os.geteuid()
+
+if self_uid == 0:
+ root = True
+else:
+ root = False
+
+
+if os.path.exists('./nohang_notify_helper'):
+ notify_helper_path = './nohang_notify_helper'
+else:
+ notify_helper_path = 'nohang_notify_helper'
+
+
+last_action_dict = dict()
+
+last_action_dict['t'] = time()
+
+
+# will store corrective actions stat
+stat_dict = dict()
+
+
+separate_log = False # will be overwritten after parse config
+
+
+cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
+
+
+self_oom_score_adj_min = '-600'
+self_oom_score_adj_max = '-6'
+
+
+write_self_oom_score_adj(self_oom_score_adj_min)
+
+
+pid_list = get_pid_list()
+
+
+print_proc_table_flag = False
+
+check_config_flag = False
+
+
+if os.path.exists('./nohang.conf'):
+ config = os.getcwd() + '/nohang.conf'
+else:
+ config = '/etc/nohang/nohang.conf'
+
+
+if len(argv) == 1:
+ pass
+elif len(argv) == 2:
+ if argv[1] == '--help' or argv[1] == '-h':
+ print(help_mess)
+ exit()
+ elif argv[1] == '--check-config' or argv[1] == '-cc':
+ check_config_flag = True
+ elif argv[1] == '--version' or argv[1] == '-v':
+ print_version()
+ elif argv[1] == '--print-proc-table' or argv[1] == '-p':
+ print_proc_table_flag = True
+ if os.path.exists('./nohang.conf'):
+ config = os.getcwd() + '/nohang.conf'
+ else:
+ config = '/etc/nohang/nohang.conf'
+ else:
+ errprint('Unknown option: {}'.format(argv[1]))
+ exit(1)
+elif len(argv) == 3:
+ if argv[1] == '--config' or argv[1] == '-c':
+ config = argv[2]
+ elif argv[1] == '--check-config' or argv[1] == '-cc':
+ config = argv[2]
+ check_config_flag = True
+ else:
+ errprint('Unknown option: {}'.format(argv[1]))
+ exit(1)
+else:
+ errprint('Invalid CLI input: too many options')
+ exit(1)
+
+
+# find mem_total
+# find positions of SwapFree and SwapTotal in /proc/meminfo
+
+with open('/proc/meminfo') as f:
+ mem_list = f.readlines()
+
+mem_list_names = []
+for s in mem_list:
+ mem_list_names.append(s.split(':')[0])
+
+if mem_list_names[2] != 'MemAvailable':
+ errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied')
+ exit(1)
+
+swap_total_index = mem_list_names.index('SwapTotal')
+swap_free_index = swap_total_index + 1
+
+mem_total = int(mem_list[0].split(':')[1][:-4])
+
+# Get names from /proc/*/status to be able to get VmRSS and VmSwap values
+
+with open('/proc/self/status') as file:
+ status_list = file.readlines()
+
+status_names = []
+for s in status_list:
+ status_names.append(s.split(':')[0])
+
+ppid_index = status_names.index('PPid')
+vm_size_index = status_names.index('VmSize')
+vm_rss_index = status_names.index('VmRSS')
+vm_swap_index = status_names.index('VmSwap')
+uid_index = status_names.index('Uid')
+state_index = status_names.index('State')
+
+
+try:
+ anon_index = status_names.index('RssAnon')
+ file_index = status_names.index('RssFile')
+ shmem_index = status_names.index('RssShmem')
+ detailed_rss = True
+ # print(detailed_rss, 'detailed_rss')
+except ValueError:
+ detailed_rss = False
+ # print('It is not Linux 4.5+')
+
+
+log('config: ' + config)
+
+
+##########################################################################
+
+# parsing the config with obtaining the parameters dictionary
+
+# conf_parameters_dict
+# conf_restart_dict
+
+# dictionary with config options
+config_dict = dict()
+
+badness_adj_re_name_list = []
+badness_adj_re_cmdline_list = []
+badness_adj_re_environ_list = []
+badness_adj_re_uid_list = []
+badness_adj_re_cgroup_v1_list = []
+badness_adj_re_cgroup_v2_list = []
+badness_adj_re_realpath_list = []
+
+soft_actions_list = []
+
+# separator for optional parameters (that starts with @)
+opt_separator = '///'
+
+# stupid conf parsing, need refactoring
+try:
+ with open(config) as f:
+
+ for line in f:
+
+ a = line.startswith('#')
+ b = line.startswith('\n')
+ c = line.startswith('\t')
+ d = line.startswith(' ')
+
+ etc = line.startswith('@SOFT_ACTION_RE_NAME')
+ etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1')
+
+ if not a and not b and not c and not d and not etc and not etc2:
+ a = line.partition('=')
+
+ key = a[0].strip()
+ value = a[2].strip()
+
+ if key not in config_dict:
+ config_dict[key] = value
+ else:
+ log('ERROR: config key duplication: {}'.format(key))
+ exit(1)
+
+ if etc:
+
+ a = line.partition('@SOFT_ACTION_RE_NAME')[
+ 2].partition(opt_separator)
+
+ a1 = 'name'
+
+ a2 = a[0].strip()
+ valid_re(a2)
+
+ a3 = a[2].strip()
+
+ zzz = (a1, a2, a3)
+
+ soft_actions_list.append(zzz)
+
+ if etc2:
+
+ a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[
+ 2].partition(opt_separator)
+
+ a1 = 'cgroup_v1'
+
+ a2 = a[0].strip()
+ valid_re(a2)
+
+ a3 = a[2].strip()
+
+ zzz = (a1, a2, a3)
+
+ soft_actions_list.append(zzz)
+
+ if line.startswith('@BADNESS_ADJ_RE_NAME'):
+ a = line.partition('@BADNESS_ADJ_RE_NAME')[2].strip(
+ ' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ badness_adj_re_name_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@BADNESS_ADJ_RE_CMDLINE'):
+ a = line.partition('@BADNESS_ADJ_RE_CMDLINE')[2].strip(
+ ' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ badness_adj_re_cmdline_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@BADNESS_ADJ_RE_UID'):
+ a = line.partition('@BADNESS_ADJ_RE_UID')[2].strip(
+ ' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ badness_adj_re_uid_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@BADNESS_ADJ_RE_CGROUP_V1'):
+ a = line.partition('@BADNESS_ADJ_RE_CGROUP_V1')[2].strip(
+ ' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ badness_adj_re_cgroup_v1_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@BADNESS_ADJ_RE_CGROUP_V2'):
+ a = line.partition('@BADNESS_ADJ_RE_CGROUP_V2')[2].strip(
+ ' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ badness_adj_re_cgroup_v2_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@BADNESS_ADJ_RE_REALPATH'):
+ a = line.partition('@BADNESS_ADJ_RE_REALPATH')[2].strip(
+ ' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ badness_adj_re_realpath_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@BADNESS_ADJ_RE_ENVIRON'):
+ a = line.partition('@BADNESS_ADJ_RE_ENVIRON')[2].strip(
+ ' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ badness_adj_re_environ_list.append((badness_adj, reg_exp))
+
+
+except PermissionError:
+ errprint('PermissionError', conf_err_mess)
+ exit(1)
+except UnicodeDecodeError:
+ errprint('UnicodeDecodeError', conf_err_mess)
+ exit(1)
+except IsADirectoryError:
+ errprint('IsADirectoryError', conf_err_mess)
+ exit(1)
+except IndexError:
+ errprint('IndexError', conf_err_mess)
+ exit(1)
+except FileNotFoundError:
+ errprint('FileNotFoundError', conf_err_mess)
+ exit(1)
+
+
+if badness_adj_re_name_list == []:
+ regex_matching = False
+else:
+ regex_matching = True
+
+
+if badness_adj_re_cmdline_list == []:
+ re_match_cmdline = False
+else:
+ re_match_cmdline = True
+
+
+if badness_adj_re_uid_list == []:
+ re_match_uid = False
+else:
+ re_match_uid = True
+
+
+if badness_adj_re_environ_list == []:
+ re_match_environ = False
+else:
+ re_match_environ = True
+
+
+if badness_adj_re_realpath_list == []:
+ re_match_realpath = False
+else:
+ re_match_realpath = True
+
+
+if badness_adj_re_cgroup_v1_list == []:
+ re_match_cgroup_v1 = False
+else:
+ re_match_cgroup_v1 = True
+
+
+if badness_adj_re_cgroup_v2_list == []:
+ re_match_cgroup_v2 = False
+else:
+ re_match_cgroup_v2 = True
+
+
+if soft_actions_list == []:
+ soft_actions = False
+else:
+ soft_actions = True
+
+
+##########################################################################
+
+
+# post_zombie_delay = 0.1
+
+# victim_cache_time = 50
+
+
+# extracting parameters from the dictionary
+# check for all necessary parameters
+# validation of all parameters
+debug_psi = conf_parse_bool('debug_psi')
+print_statistics = conf_parse_bool('print_statistics')
+print_proc_table = conf_parse_bool('print_proc_table')
+forbid_negative_badness = conf_parse_bool('forbid_negative_badness')
+print_victim_status = conf_parse_bool('print_victim_status')
+print_victim_cmdline = conf_parse_bool('print_victim_cmdline')
+print_config_at_startup = conf_parse_bool('print_config_at_startup')
+print_mem_check_results = conf_parse_bool('print_mem_check_results')
+debug_sleep = conf_parse_bool('debug_sleep')
+low_memory_warnings_enabled = conf_parse_bool('low_memory_warnings_enabled')
+post_action_gui_notifications = conf_parse_bool(
+ 'post_action_gui_notifications')
+
+
+psi_checking_enabled = conf_parse_bool('psi_checking_enabled')
+ignore_psi = not psi_checking_enabled
+
+zram_checking_enabled = conf_parse_bool('zram_checking_enabled')
+ignore_zram = not zram_checking_enabled
+
+
+debug_gui_notifications = conf_parse_bool('debug_gui_notifications')
+ignore_positive_oom_score_adj = conf_parse_bool(
+ 'ignore_positive_oom_score_adj')
+
+
+(soft_threshold_min_mem_kb, soft_threshold_min_mem_mb,
+ soft_threshold_min_mem_percent) = calculate_percent('soft_threshold_min_mem')
+
+(hard_threshold_min_mem_kb, hard_threshold_min_mem_mb,
+ hard_threshold_min_mem_percent) = calculate_percent('hard_threshold_min_mem')
+
+(soft_threshold_max_zram_kb, soft_threshold_max_zram_mb,
+ soft_threshold_max_zram_percent) = calculate_percent('soft_threshold_max_zram')
+
+(hard_threshold_max_zram_kb, hard_threshold_max_zram_mb,
+ hard_threshold_max_zram_percent) = calculate_percent('hard_threshold_max_zram')
+
+(warning_threshold_min_mem_kb, warning_threshold_min_mem_mb,
+ warning_threshold_min_mem_percent) = calculate_percent('warning_threshold_min_mem')
+
+(warning_threshold_max_zram_kb, warning_threshold_max_zram_mb,
+ warning_threshold_max_zram_percent) = calculate_percent('warning_threshold_max_zram')
+
+
+if 'post_zombie_delay' in config_dict:
+ post_zombie_delay = string_to_float_convert_test(
+ config_dict['post_zombie_delay'])
+ if post_zombie_delay is None:
+ errprint('Invalid post_zombie_delay, not float\nExit')
+ exit(1)
+ if post_zombie_delay < 0:
+ errprint('post_zombie_delay MUST be >= 0\nExit')
+ exit(1)
+else:
+ errprint('post_zombie_delay not in config\nExit')
+ exit(1)
+
+
+if 'victim_cache_time' in config_dict:
+ victim_cache_time = string_to_float_convert_test(
+ config_dict['victim_cache_time'])
+ if victim_cache_time is None:
+ errprint('Invalid victim_cache_time, not float\nExit')
+ exit(1)
+ if victim_cache_time < 0:
+ errprint('victim_cache_time MUST be >= 0\nExit')
+ exit(1)
+else:
+ errprint('victim_cache_time not in config\nExit')
+ exit(1)
+
+
+if 'fill_rate_mem' in config_dict:
+ fill_rate_mem = string_to_float_convert_test(config_dict['fill_rate_mem'])
+ if fill_rate_mem is None:
+ errprint('Invalid fill_rate_mem value, not float\nExit')
+ exit(1)
+ if fill_rate_mem <= 0:
+ errprint('fill_rate_mem MUST be > 0\nExit')
+ exit(1)
+else:
+ errprint('fill_rate_mem not in config\nExit')
+ exit(1)
+
+
+if 'fill_rate_swap' in config_dict:
+ fill_rate_swap = string_to_float_convert_test(
+ config_dict['fill_rate_swap'])
+ if fill_rate_swap is None:
+ errprint('Invalid fill_rate_swap value, not float\nExit')
+ exit(1)
+ if fill_rate_swap <= 0:
+ errprint('fill_rate_swap MUST be > 0\nExit')
+ exit(1)
+else:
+ errprint('fill_rate_swap not in config\nExit')
+ exit(1)
+
+
+if 'fill_rate_zram' in config_dict:
+ fill_rate_zram = string_to_float_convert_test(
+ config_dict['fill_rate_zram'])
+ if fill_rate_zram is None:
+ errprint('Invalid fill_rate_zram value, not float\nExit')
+ exit(1)
+ if fill_rate_zram <= 0:
+ errprint('fill_rate_zram MUST be > 0\nExit')
+ exit(1)
+else:
+ errprint('fill_rate_zram not in config\nExit')
+ exit(1)
+
+
+if 'soft_threshold_min_swap' in config_dict:
+ soft_threshold_min_swap = config_dict['soft_threshold_min_swap']
+else:
+ errprint('soft_threshold_min_swap not in config\nExit')
+ exit(1)
+
+
+if 'hard_threshold_min_swap' in config_dict:
+ hard_threshold_min_swap = config_dict['hard_threshold_min_swap']
+else:
+ errprint('hard_threshold_min_swap not in config\nExit')
+ exit(1)
+
+
+if 'post_soft_action_delay' in config_dict:
+ post_soft_action_delay = string_to_float_convert_test(
+ config_dict['post_soft_action_delay'])
+ if post_soft_action_delay is None:
+ errprint('Invalid post_soft_action_delay value, not float\nExit')
+ exit(1)
+ if post_soft_action_delay < 0:
+ errprint('post_soft_action_delay must be positiv\nExit')
+ exit(1)
+else:
+ errprint('post_soft_action_delay not in config\nExit')
+ exit(1)
+
+
+if 'psi_post_action_delay' in config_dict:
+ psi_post_action_delay = string_to_float_convert_test(
+ config_dict['psi_post_action_delay'])
+ if psi_post_action_delay is None:
+ errprint('Invalid psi_post_action_delay value, not float\nExit')
+ exit(1)
+ if psi_post_action_delay < 0:
+ errprint('psi_post_action_delay must be positive\nExit')
+ exit(1)
+else:
+ errprint('psi_post_action_delay not in config\nExit')
+ exit(1)
+
+
+if 'hard_threshold_max_psi' in config_dict:
+ hard_threshold_max_psi = string_to_float_convert_test(
+ config_dict['hard_threshold_max_psi'])
+ if hard_threshold_max_psi is None:
+ errprint('Invalid hard_threshold_max_psi value, not float\nExit')
+ exit(1)
+ if hard_threshold_max_psi < 0 or hard_threshold_max_psi > 100:
+ errprint('hard_threshold_max_psi must be in the range [0; 100]\nExit')
+ exit(1)
+else:
+ errprint('hard_threshold_max_psi not in config\nExit')
+ exit(1)
+
+
+if 'soft_threshold_max_psi' in config_dict:
+ soft_threshold_max_psi = string_to_float_convert_test(
+ config_dict['soft_threshold_max_psi'])
+ if soft_threshold_max_psi is None:
+ errprint('Invalid soft_threshold_max_psi value, not float\nExit')
+ exit(1)
+ if soft_threshold_max_psi < 0 or soft_threshold_max_psi > 100:
+ errprint('soft_threshold_max_psi must be in the range [0; 100]\nExit')
+ exit(1)
+else:
+ errprint('soft_threshold_max_psi not in config\nExit')
+ exit(1)
+
+
+if 'warning_threshold_max_psi' in config_dict:
+ warning_threshold_max_psi = string_to_float_convert_test(
+ config_dict['warning_threshold_max_psi'])
+ if warning_threshold_max_psi is None:
+ errprint('Invalid warning_threshold_max_psi value, not float\nExit')
+ exit(1)
+ if warning_threshold_max_psi < 0 or warning_threshold_max_psi > 100:
+ errprint(
+ 'warning_threshold_max_psi must be in the range [0; 100]\nExit')
+ exit(1)
+else:
+ errprint('warning_threshold_max_psi not in config\nExit')
+ exit(1)
+
+
+if 'min_badness' in config_dict:
+ min_badness = string_to_int_convert_test(
+ config_dict['min_badness'])
+ if min_badness is None:
+ errprint('Invalid min_badness value, not integer\nExit')
+ exit(1)
+ if min_badness < 0 or min_badness > 1000:
+ errprint('Invalud min_badness value\nExit')
+ exit(1)
+else:
+ errprint('min_badness not in config\nExit')
+ exit(1)
+
+
+if 'min_post_warning_delay' in config_dict:
+ min_post_warning_delay = string_to_float_convert_test(
+ config_dict['min_post_warning_delay'])
+ if min_post_warning_delay is None:
+ errprint('Invalid min_post_warning_delay value, not float\nExit')
+ exit(1)
+ if min_post_warning_delay < 1 or min_post_warning_delay > 300:
+ errprint('min_post_warning_delay value out of range [1; 300]\nExit')
+ exit(1)
+else:
+ errprint('min_post_warning_delay not in config\nExit')
+ exit(1)
+
+
+if 'warning_threshold_min_swap' in config_dict:
+ warning_threshold_min_swap = config_dict['warning_threshold_min_swap']
+else:
+ errprint('warning_threshold_min_swap not in config\nExit')
+ exit(1)
+
+
+if 'max_victim_ancestry_depth' in config_dict:
+ max_victim_ancestry_depth = string_to_int_convert_test(
+ config_dict['max_victim_ancestry_depth'])
+ if min_badness is None:
+ errprint('Invalid max_victim_ancestry_depth value, not integer\nExit')
+ exit(1)
+ if max_victim_ancestry_depth < 1:
+ errprint('Invalud max_victim_ancestry_depth value\nExit')
+ exit(1)
+else:
+ errprint('max_victim_ancestry_depth is not in config\nExit')
+ exit(1)
+
+
+if 'max_soft_exit_time' in config_dict:
+ max_soft_exit_time = string_to_float_convert_test(
+ config_dict['max_soft_exit_time'])
+ if max_soft_exit_time is None:
+ errprint('Invalid max_soft_exit_time val'
+ 'ue, not float\nExit')
+ exit(1)
+ if max_soft_exit_time < 0:
+ errprint('max_soft_exit_time must be non-n'
+ 'egative number\nExit')
+ exit(1)
+else:
+ errprint('max_soft_exit_time is not in config\nExit')
+ exit(1)
+
+
+if 'post_kill_exe' in config_dict:
+ post_kill_exe = config_dict['post_kill_exe']
+else:
+ errprint('post_kill_exe is not in config\nExit')
+ exit(1)
+
+
+if 'psi_path' in config_dict:
+ psi_path = config_dict['psi_path']
+else:
+ errprint('psi_path is not in config\nExit')
+ exit(1)
+
+
+if 'psi_metrics' in config_dict:
+ psi_metrics = config_dict['psi_metrics']
+else:
+ errprint('psi_metrics is not in config\nExit')
+ exit(1)
+
+
+if 'warning_exe' in config_dict:
+ warning_exe = config_dict['warning_exe']
+ if warning_exe != '':
+ check_warning_exe = True
+ else:
+ check_warning_exe = False
+else:
+ errprint('warning_exe is not in config\nExit')
+ exit(1)
+
+
+if 'extra_table_info' in config_dict:
+ extra_table_info = config_dict['extra_table_info']
+ if (extra_table_info != 'None' and
+ extra_table_info != 'cgroup_v1' and
+ extra_table_info != 'cgroup_v2' and
+ extra_table_info != 'cmdline' and
+ extra_table_info != 'environ' and
+ extra_table_info != 'realpath'):
+
+ errprint('Invalid config: invalid extra_table_info value\nExit')
+ exit(1)
+else:
+ errprint('Invalid config: extra_table_info is not in config\nExit')
+ exit(1)
+
+
+separate_log = conf_parse_bool('separate_log')
+
+if separate_log:
+
+ import logging
+
+ log_dir = '/var/log/nohang'
+
+ try:
+ os.mkdir(log_dir)
+ except PermissionError:
+ print('ERROR: can not create log dir')
+ except FileExistsError:
+ pass
+
+ logfile = log_dir + '/nohang.log'
+
+ try:
+ with open(logfile, 'a') as f:
+ pass
+ except FileNotFoundError:
+ print('ERROR: log FileNotFoundError')
+ except PermissionError:
+ print('ERROR: log PermissionError')
+
+ try:
+ logging.basicConfig(
+ filename=logfile,
+ level=logging.INFO,
+ format="%(asctime)s: %(message)s")
+ except PermissionError:
+ errprint('ERROR: Permission denied: {}'.format(logfile))
+ except FileNotFoundError:
+ errprint('ERROR: FileNotFoundError: {}'.format(logfile))
+
+
+if 'min_mem_report_interval' in config_dict:
+ min_mem_report_interval = string_to_float_convert_test(
+ config_dict['min_mem_report_interval'])
+ if min_mem_report_interval is None:
+ errprint('Invalid min_mem_report_interval value, not float\nExit')
+ exit(1)
+ if min_mem_report_interval < 0:
+ errprint('min_mem_report_interval must be non-negative number\nExit')
+ exit(1)
+else:
+ errprint('min_mem_report_interval is not in config\nExit')
+ exit(1)
+
+
+if 'psi_excess_duration' in config_dict:
+ psi_excess_duration = string_to_float_convert_test(
+ config_dict['psi_excess_duration'])
+ if psi_excess_duration is None:
+ errprint('Invalid psi_excess_duration value, not float\nExit')
+ exit(1)
+ if psi_excess_duration < 0:
+ errprint('psi_excess_duration must be non-negative number\nExit')
+ exit(1)
+else:
+ errprint('psi_excess_duration is not in config\nExit')
+ exit(1)
+
+
+if 'max_sleep' in config_dict:
+ max_sleep = string_to_float_convert_test(
+ config_dict['max_sleep'])
+ if max_sleep is None:
+ errprint('Invalid max_sleep value, not float\nExit')
+ exit(1)
+ if max_sleep <= 0:
+ errprint('max_sleep must be positive number\nExit')
+ exit(1)
+else:
+ errprint('max_sleep is not in config\nExit')
+ exit(1)
+
+
+if 'min_sleep' in config_dict:
+ min_sleep = string_to_float_convert_test(
+ config_dict['min_sleep'])
+ if min_sleep is None:
+ errprint('Invalid min_sleep value, not float\nExit')
+ exit(1)
+ if min_sleep <= 0:
+ errprint('min_sleep must be positive number\nExit')
+ exit(1)
+else:
+ errprint('min_sleep is not in config\nExit')
+ exit(1)
+
+
+if 'over_sleep' in config_dict:
+ over_sleep = string_to_float_convert_test(
+ config_dict['over_sleep'])
+ if over_sleep is None:
+ errprint('Invalid over_sleep value, not float\nExit')
+ exit(1)
+ if over_sleep <= 0:
+ errprint('over_sleep must be positive number\nExit')
+ exit(1)
+else:
+ errprint('over_sleep is not in config\nExit')
+ exit(1)
+
+
+sensitivity_test_time = over_sleep / 2
+
+
+if max_sleep < min_sleep:
+ errprint('min_sleep value must not exceed max_sleep value.\nExit')
+ exit(1)
+
+
+if min_sleep < over_sleep:
+ errprint('over_sleep value must not exceed min_sleep value.\nExit')
+ exit(1)
+
+
+if max_sleep == min_sleep:
+ stable_sleep = True
+else:
+ stable_sleep = False
+
+
+if print_proc_table_flag:
+
+ if not root:
+ log('WARNING: effective UID != 0; euid={}; processes with other e'
+ 'uids will be invisible for nohang'.format(self_uid))
+
+ func_print_proc_table()
+
+
+##########################################################################
+
+
+psi_support = os.path.exists(psi_path)
+
+
+##########################################################################
+
+# Get KiB levels if it's possible.
+
+soft_threshold_min_swap_tuple = get_swap_threshold_tuple(
+ soft_threshold_min_swap)
+hard_threshold_min_swap_tuple = get_swap_threshold_tuple(
+ hard_threshold_min_swap)
+warning_threshold_min_swap_tuple = get_swap_threshold_tuple(
+ warning_threshold_min_swap)
+
+
+swap_kb_dict = dict()
+
+swap_term_is_percent = soft_threshold_min_swap_tuple[1]
+if swap_term_is_percent:
+ soft_threshold_min_swap_percent = soft_threshold_min_swap_tuple[0]
+else:
+ soft_threshold_min_swap_kb = soft_threshold_min_swap_tuple[0]
+ swap_kb_dict['soft_threshold_min_swap_kb'] = soft_threshold_min_swap_kb
+
+swap_kill_is_percent = hard_threshold_min_swap_tuple[1]
+if swap_kill_is_percent:
+ hard_threshold_min_swap_percent = hard_threshold_min_swap_tuple[0]
+else:
+ hard_threshold_min_swap_kb = hard_threshold_min_swap_tuple[0]
+ swap_kb_dict['hard_threshold_min_swap_kb'] = hard_threshold_min_swap_kb
+
+
+swap_warn_is_percent = warning_threshold_min_swap_tuple[1]
+if swap_warn_is_percent:
+ warning_threshold_min_swap_percent = warning_threshold_min_swap_tuple[0]
+else:
+ warning_threshold_min_swap_kb = warning_threshold_min_swap_tuple[0]
+ swap_kb_dict['warning_threshold_min_swap_kb'] = warning_threshold_min_swap_kb
+
+
+##########################################################################
+
+
+if print_config_at_startup or check_config_flag:
+ check_config()
+
+
+##########################################################################
+
+
+# for calculating the column width when printing mem and zram
+mem_len = len(str(round(mem_total / 1024.0)))
+
+if post_action_gui_notifications:
+ notify_sig_dict = {SIGKILL: 'Killing',
+ SIGTERM: 'Terminating'}
+
+
+# convert rates from MiB/s to KiB/s
+fill_rate_mem = fill_rate_mem * 1024
+fill_rate_swap = fill_rate_swap * 1024
+fill_rate_zram = fill_rate_zram * 1024
+
+
+warn_time_now = 0
+warn_time_delta = 1000
+warn_timer = 0
+
+
+##########################################################################
+
+
+if not root:
+ log('WARNING: effective UID != 0; euid={}; processes with other e'
+ 'uids will be invisible for nohang'.format(self_uid))
+
+
+# Try to lock all memory
+
+mlockall()
+
+##########################################################################
+
+
+# print_self_rss()
+
+psi_avg_string = '' # will be overwritten if PSI monitoring enabled
+
+mem_used_zram = 0
+
+
+if print_mem_check_results:
+
+ # to find delta mem
+ wt2 = 0
+ new_mem = 0
+
+ # init mem report interval
+ report0 = 0
+
+
+# handle signals
+for i in sig_list:
+ signal(i, signal_handler)
+
+
+x0 = time()
+delta0 = 0
+
+
+threshold = None
+mem_info = None
+
+
+CHECK_PSI = False
+if psi_support and not ignore_psi:
+ CHECK_PSI = True
+
+psi_kill_exceeded_timer = 0
+psi_term_exceeded_timer = 0
+psi_t0 = time()
+psi_threshold = zram_threshold = zram_info = psi_info = None
+
+
+CHECK_ZRAM = not ignore_zram
+
+log('Monitoring has started!')
+
+stdout.flush()
+
+
+##########################################################################
+
+
+while True:
+
+ (masf_threshold, masf_info, mem_available, hard_threshold_min_swap_kb,
+ soft_threshold_min_swap_kb, swap_free, swap_total) = check_mem_swap_ex()
+
+ if CHECK_ZRAM:
+ zram_threshold, zram_info, mem_used_zram = check_zram_ex()
+
+ if CHECK_PSI:
+ (psi_threshold, psi_info, psi_t0, psi_kill_exceeded_timer,
+ psi_term_exceeded_timer, x0) = check_psi_ex(
+ psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0)
+
+ if print_mem_check_results:
+
+ if CHECK_PSI:
+ psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
+ if time() - psi_t0 >= psi_post_action_delay:
+ psi_post_action_delay_exceeded = True
+ else:
+ psi_post_action_delay_exceeded = False
+
+ if print_mem_check_results:
+ psi_avg_string = 'PSI avg: {} | '.format(
+ str(psi_avg_value).rjust(6))
+
+ wt1 = time()
+
+ delta = (mem_available + swap_free) - new_mem
+
+ t_cycle = wt1 - wt2
+
+ report_delta = wt1 - report0
+
+ if report_delta >= min_mem_report_interval:
+
+ mem_report = True
+ new_mem = mem_available + swap_free
+
+ report0 = wt1
+
+ else:
+ mem_report = False
+
+ wt2 = time()
+
+ if mem_report:
+
+ speed = delta / 1024.0 / report_delta
+ speed_info = ' | dMem: {} M/s'.format(
+ str(round(speed)).rjust(5)
+ )
+
+ # Calculate 'swap-column' width
+ swap_len = len(str(round(swap_total / 1024.0)))
+
+ # Output available mem sizes
+ if swap_total == 0 and mem_used_zram == 0:
+ log('{}MemAvail: {} M, {} %{}'.format(
+ psi_avg_string,
+ human(mem_available, mem_len),
+ just_percent_mem(mem_available / mem_total),
+ speed_info
+ )
+ )
+
+ elif swap_total > 0 and mem_used_zram == 0:
+ log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format(
+ psi_avg_string,
+ human(mem_available, mem_len),
+ just_percent_mem(mem_available / mem_total),
+ human(swap_free, swap_len),
+ just_percent_swap(swap_free / (swap_total + 0.1)),
+ speed_info
+ )
+ )
+
+ else:
+ log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
+ 'UsedZram: {} M, {} %{}'.format(
+ psi_avg_string,
+ human(mem_available, mem_len),
+ just_percent_mem(mem_available / mem_total),
+ human(swap_free, swap_len),
+ just_percent_swap(swap_free / (swap_total + 0.1)),
+ human(mem_used_zram, mem_len),
+ just_percent_mem(mem_used_zram / mem_total),
+ speed_info
+ )
+ )
+
+ if (masf_threshold is SIGKILL or zram_threshold is SIGKILL or
+ psi_threshold is SIGKILL):
+
+ threshold = SIGKILL
+ mem_info_list = []
+
+ if masf_info is not None:
+ mem_info_list.append(masf_info)
+
+ if zram_info is not None:
+ mem_info_list.append(zram_info)
+
+ if psi_info is not None:
+ mem_info_list.append(psi_info)
+
+ psi_t0 = implement_corrective_action(
+ threshold,
+ mem_info_list,
+ psi_t0,
+ psi_kill_exceeded_timer,
+ psi_term_exceeded_timer,
+ x0, psi_threshold, zram_threshold, zram_info, psi_info)
+ continue
+
+ if (masf_threshold is SIGTERM or zram_threshold is SIGTERM or
+ psi_threshold is SIGTERM):
+
+ threshold = SIGTERM
+ mem_info_list = []
+
+ if masf_info is not None:
+ mem_info_list.append(masf_info)
+
+ if zram_info is not None:
+ mem_info_list.append(zram_info)
+
+ if psi_info is not None:
+ mem_info_list.append(psi_info)
+
+ psi_t0 = implement_corrective_action(
+ threshold,
+ mem_info_list,
+ psi_t0,
+ psi_kill_exceeded_timer,
+ psi_term_exceeded_timer,
+ x0, psi_threshold, zram_threshold, zram_info, psi_info)
+ continue
+
+ if low_memory_warnings_enabled:
+
+ if (masf_threshold == 'WARN' or zram_threshold == 'WARN' or
+ psi_threshold == 'WARN'):
+
+ warn_time_delta = time() - warn_time_now
+ warn_time_now = time()
+ warn_timer += warn_time_delta
+ if warn_timer > min_post_warning_delay:
+
+ send_notify_warn()
+
+ warn_timer = 0
+
+ sleep_after_check_mem()
diff --git a/old/nohang.conf b/old/nohang.conf
new file mode 100644
index 0000000..1b13348
--- /dev/null
+++ b/old/nohang.conf
@@ -0,0 +1,359 @@
+ This is nohang config file.
+ Lines starting with #, tabs and spaces are comments.
+ Lines starting with @ contain optional parameters.
+ All values are case sensitive.
+ Be careful: nohang doesn't forbid you to shoot yourself in the foot.
+
+ The configuration includes the following sections:
+
+ 0. Common zram settings
+ 1. Memory levels to respond to as an OOM threat
+ 2. Response on PSI memory metrics
+ 3. The frequency of checking the level of available memory
+ (and CPU usage)
+ 4. The prevention of killing innocent victims
+ 5. Impact on the badness of processes via matching their names, cgroups and
+ cmdlines with specified regular expressions
+ 6. Customize corrective actions: the execution of a specific command
+ instead of sending the SIGTERM signal
+ 7. GUI notifications:
+ - low memory warnings
+ - OOM prevention results
+ 8. Output verbosity
+ 9. Misc
+
+ Just read the description of the parameters and edit the values.
+ Please restart the program after editing the config.
+
+ More docs will be written later.
+
+###############################################################################
+
+ 0. Common zram settings
+
+ See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
+ You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize.
+
+zram_checking_enabled = False
+
+###############################################################################
+
+ 1. Thresholds below which a signal should be sent to the victim
+
+ Sets the available memory levels at or below which SIGTERM or SIGKILL
+ signals are sent. The signal will be sent if MemAvailable and
+ SwapFree (in /proc/meminfo) at the same time will drop below the
+ corresponding values. Can be specified in % (percent) and M (MiB).
+ Valid values are floating-point numbers from the range [0; 100] %.
+
+ MemAvailable levels.
+
+soft_threshold_min_mem = 8 %
+hard_threshold_min_mem = 4 %
+
+ SwapFree levels.
+
+soft_threshold_min_swap = 10 %
+hard_threshold_min_swap = 5 %
+
+ Specifying the total share of zram in memory, if exceeded the
+ corresponding signals are sent. As the share of zram in memory
+ increases, it may fall responsiveness of the system. 90 % is a
+ usual hang level, not recommended to set very high.
+
+ Can be specified in % and M. Valid values are floating-point
+ numbers from the range [0; 90] %.
+
+soft_threshold_max_zram = 60 %
+hard_threshold_max_zram = 65 %
+
+
+###############################################################################
+
+ 2. Response on PSI memory metrics (it needs Linux 4.20 and up)
+
+ About PSI:
+ https://facebookmicrosites.github.io/psi/
+
+ Disabled by default (psi_checking_enabled = False).
+
+psi_checking_enabled = False
+
+ Choose a path to PSI file.
+ By default it monitors system-wide file: /proc/pressure/memory
+ You also can set file to monitor one cgroup slice.
+ For example:
+ psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
+ psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
+ psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
+
+ Execute the command
+ find /sys/fs/cgroup -name memory.pressure
+ to find available memory.pressue files (except /proc/pressure/memory).
+ (actual for cgroup2)
+
+psi_path = /proc/pressure/memory
+
+ Valid psi_metrics are:
+ some_avg10
+ some_avg60
+ some_avg300
+ full_avg10
+ full_avg60
+ full_avg300
+
+ some_avg10 is most sensitive.
+
+psi_metrics = some_avg10
+
+soft_threshold_max_psi = 60
+
+hard_threshold_max_psi = 90
+
+ >= 0, float
+psi_excess_duration = 60
+
+psi_post_action_delay = 60
+
+
+###############################################################################
+
+ 3. The frequency of checking the amount of available memory
+ (and CPU usage)
+
+ Coefficients that affect the intensity of monitoring. Reducing
+ the coefficients can reduce CPU usage and increase the periods
+ between memory checks.
+
+ Why three coefficients instead of one? Because the swap fill rate
+ is usually lower than the RAM fill rate.
+
+ It is possible to set a lower intensity of monitoring for swap
+ without compromising to prevent OOM and thus reduce the CPU load.
+
+ Default values are well for desktop. On servers without rapid
+ fluctuations in memory levels the values can be reduced.
+
+ Valid values are positive floating-point numbers.
+
+fill_rate_mem = 4000
+fill_rate_swap = 1500
+fill_rate_zram = 6000
+
+ See also https://github.com/rfjakob/earlyoom/issues/61
+
+max_sleep = 3
+min_sleep = 0.1
+
+ Sleep time if soft threshold exceeded.
+
+over_sleep = 0.05
+
+###############################################################################
+
+ 4. The prevention of killing innocent victims
+
+ Valid values are integers from the range [0; 1000].
+
+min_badness = 10
+
+ Valid values are non-negative floating-point numbers.
+ Min delay if a victim doesn't respond to SIGTERM in 10 ms.
+
+post_soft_action_delay = 3
+
+post_zombie_delay = 0.1
+
+victim_cache_time = 10
+
+ Valid values are True and False.
+
+ignore_positive_oom_score_adj = False
+
+###############################################################################
+
+ 5. Impact on the badness of processes via matching their names,
+ cmdlines or UIDs with regular expressions using re.search().
+
+ See https://en.wikipedia.org/wiki/Regular_expression and
+ https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
+
+ Enabling this options slows down the search for the victim
+ because the names, cmdlines or UIDs of all processes
+ (except init and kthreads) are compared with the
+ specified regex patterns (in fact slowing down is caused by
+ reading all /proc/*/cmdline and /proc/*/status files).
+
+ Use script `oom-sort` from nohang package to view
+ names, cmdlines and UIDs of processes.
+
+ 5.1. Matching process names with RE patterns
+
+ Syntax:
+
+ @BADNESS_ADJ_RE_NAME badness_adj /// RE_pattern
+
+ New badness value will be += badness_adj
+
+ It is possible to compare multiple patterns
+ with different badness_adj values.
+
+ Example:
+ @BADNESS_ADJ_RE_NAME -500 /// ^sshd$
+
+ 5.2. Matching CGroup_v1-line with RE patterns
+
+ @BADNESS_ADJ_RE_CGROUP_V1 -100 /// ^/system\.slice/
+
+ @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
+
+ @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
+
+ 5.3. Matching CGroup_v2-line with RE patterns
+
+ @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
+
+ 5.4. Matching eUIDs with RE patterns
+
+ @BADNESS_ADJ_RE_UID -100 /// ^0$
+
+ 5.5. Matching realpath with RE patterns
+
+ @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
+
+ 5.6. Matching cmdlines with RE patterns
+
+ A good option that allows fine adjustment.
+
+ Prefer chromium tabs and electron-based apps
+ @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
+
+ Prefer firefox tabs (Web Content and WebExtensions)
+ @BADNESS_ADJ_RE_CMDLINE 300 /// -appomni
+
+ @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
+
+ 5.7. Matching environ with RE patterns
+
+ @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
+
+ Note that you can control badness also via systemd units via
+ OOMScoreAdjust, see
+ www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
+
+###############################################################################
+
+ 6. Customize corrective actions.
+
+ TODO: docs
+
+ Syntax:
+ KEY REGEXP SEPARATOR COMMAND
+
+ @SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID
+ @SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID
+
+ @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
+ @SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
+
+ $PID will be replaced by process PID.
+ $NAME will be replaced by process name.
+ $SERVICE will be replaced by .service if it exists (overwise it will be
+ relpaced by empty line)
+
+###############################################################################
+
+ 7. GUI notifications & low memory warnings
+
+post_action_gui_notifications = False
+
+ Enable GUI notifications about the low level of available memory.
+ Valid values are True and False.
+
+low_memory_warnings_enabled = False
+
+ Execute the command instead of sending GUI notifications if the value is
+ not empty line. For example:
+ warning_exe = cat /proc/meminfo &
+
+warning_exe =
+
+ Can be specified in % (percent) and M (MiB).
+ Valid values are floating-point numbers from the range [0; 100] %.
+
+warning_threshold_min_mem = 20 %
+
+warning_threshold_min_swap = 25 %
+
+warning_threshold_max_zram = 50 %
+
+warning_threshold_max_psi = 100
+
+ Valid values are floating-point numbers from the range [1; 300].
+
+min_post_warning_delay = 20
+
+ Ampersands (&) will be replaced with asterisks (*) in process
+ names and in commands.
+
+###############################################################################
+
+ 8. Verbosity
+
+ Display the configuration when the program starts.
+ Valid values are True and False.
+
+print_config_at_startup = False
+
+ Print memory check results.
+ Valid values are True and False.
+
+print_mem_check_results = False
+
+min_mem_report_interval = 60
+
+print_proc_table = False
+
+ Valid values:
+ None
+ cgroup_v1
+ cgroup_v2
+ realpath
+ cmdline
+ environ
+
+extra_table_info = None
+
+print_victim_status = True
+
+max_victim_ancestry_depth = 3
+
+print_victim_cmdline = False
+
+print_statistics = True
+
+ Print sleep periods between memory checks.
+ Valid values are True and False.
+
+debug_psi = False
+
+debug_gui_notifications = False
+
+debug_sleep = False
+
+separate_log = False
+
+###############################################################################
+
+ 9. Misc
+
+max_soft_exit_time = 10
+
+post_kill_exe =
+
+forbid_negative_badness = True
+
+###############################################################################
+
+ Use cases, feature requests and any questions are welcome:
+ https://github.com/hakavlad/nohang/issues
diff --git a/nohang_notify_helper b/old/nohang_notify_helper
similarity index 100%
rename from nohang_notify_helper
rename to old/nohang_notify_helper
diff --git a/test.conf b/test.conf
index 28f837f..dc85b2b 100644
--- a/test.conf
+++ b/test.conf
@@ -291,6 +291,8 @@ warning_threshold_max_psi = 100
min_post_warning_delay = 20
+env_cache_time = 300
+
Ampersands (&) will be replaced with asterisks (*) in process
names and in commands.
@@ -341,6 +343,9 @@ debug_sleep = True
separate_log = True
+debug_threading = True
+
+
###############################################################################
9. Misc