diff --git a/README.md b/README.md
index 30415d1..36da9c0 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ Of course, you can also [download more RAM](https://downloadmoreram.com/), tune
For basic usage:
- `Linux` 3.14+ (since `MemAvailable` appeared in `/proc/meminfo`)
-- `Python` 3.3+ (not tested with previous)
+- `Python` 3.3+
To show GUI notifications:
- [notification server](https://wiki.archlinux.org/index.php/Desktop_notifications#Notification_servers) (most of desktop environments use their own implementations)
diff --git a/old/nohang b/old/nohang
deleted file mode 100755
index da010ea..0000000
--- a/old/nohang
+++ /dev/null
@@ -1,3360 +0,0 @@
-#!/usr/bin/env python3
-"""A daemon that prevents OOM in Linux systems."""
-
-import os
-from ctypes import CDLL
-from time import sleep, time
-from operator import itemgetter
-from sys import stdout, stderr, argv, exit
-from re import search
-from sre_constants import error as invalid_re
-from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
-from threading import Thread
-
-
-##########################################################################
-
-# define functions
-
-
-def check_config():
- """
- """
-
- log('#' * 79)
-
- log('0. Common zram settings')
-
- log(' zram_checking_enabled: {}'.format(zram_checking_enabled))
-
- log('1. Thresholds below which a signal should be sent to the victim')
-
- log(' soft_threshold_min_mem: {} MiB, {} %'.format(
- round(soft_threshold_min_mem_mb), round(soft_threshold_min_mem_percent, 1)))
- log(' hard_threshold_min_mem: {} MiB, {} %'.format(
- round(hard_threshold_min_mem_mb), round(hard_threshold_min_mem_percent, 1)))
- log(' soft_threshold_min_swap: {}'.format(soft_threshold_min_swap))
- log(' hard_threshold_min_swap: {}'.format(hard_threshold_min_swap))
- log(' soft_threshold_max_zram: {} MiB, {} %'.format(
- round(soft_threshold_max_zram_mb), round(soft_threshold_max_zram_percent, 1)))
- log(' hard_threshold_max_zram: {} MiB, {} %'.format(
- round(hard_threshold_max_zram_mb), round(hard_threshold_max_zram_percent, 1)))
-
- log('2. Response on PSI memory metrics')
-
- log(' psi_checking_enabled: {}'.format(psi_checking_enabled))
- log(' psi_path: {}'.format(psi_path))
- log(' psi_metrics: {}'.format(psi_metrics))
- log(' soft_threshold_max_psi: {}'.format(soft_threshold_max_psi))
- log(' hard_threshold_max_psi: {}'.format(hard_threshold_max_psi))
- log(' psi_excess_duration: {} sec'.format(psi_excess_duration))
- log(' psi_post_action_delay: {} sec'.format(psi_post_action_delay))
-
- log('3. The frequency of checking the amount of available memory')
-
- log(' fill_rate_mem: {}'.format(fill_rate_mem))
- log(' fill_rate_swap: {}'.format(fill_rate_swap))
- log(' fill_rate_zram: {}'.format(fill_rate_zram))
- log(' max_sleep: {} sec'.format(max_sleep))
- log(' min_sleep: {} sec'.format(min_sleep))
- log(' over_sleep: {} sec'.format(over_sleep))
-
- log('4. The prevention of killing innocent victims')
-
- log(' min_badness: {}'.format(min_badness))
- log(' post_soft_action_delay: {} sec'.format(post_soft_action_delay))
- log(' post_zombie_delay: {} sec'.format(post_zombie_delay))
- log(' victim_cache_time: {} sec'.format(victim_cache_time))
- log(' ignore_positive_oom_score_adj: {}'.format(
- ignore_positive_oom_score_adj))
-
- log('5. Impact on the badness of processes')
-
- log('5.1. Matching process names with RE patterns')
- if len(badness_adj_re_name_list) > 0:
- log(' regexp: badness_adj:')
- for i in badness_adj_re_name_list:
- log(' {} {}'.format(i[1], i[0]))
- else:
- log(' (not set)')
-
- log('5.2. Matching CGroup_v1-line with RE patterns')
- if len(badness_adj_re_cgroup_v1_list) > 0:
- log(' regexp: badness_adj:')
- for i in badness_adj_re_cgroup_v1_list:
- log(' {} {}'.format(i[1], i[0]))
- else:
- log(' (not set)')
-
- log('5.3. Matching CGroup_v2-line with RE patterns')
- if len(badness_adj_re_cgroup_v2_list) > 0:
- log(' regexp: badness_adj:')
- for i in badness_adj_re_cgroup_v1_list:
- log(' {} {}'.format(i[1], i[0]))
- else:
- log(' (not set)')
-
- log('5.4. Matching eUIDs with RE patterns')
- if len(badness_adj_re_cgroup_v2_list) > 0:
- log(' regexp: badness_adj:')
- for i in badness_adj_re_uid_list:
- log(' {} {}'.format(i[1], i[0]))
- else:
- log(' (not set)')
-
- log('5.5. Matching realpath with RE patterns')
- if len(badness_adj_re_cgroup_v2_list) > 0:
- log(' regexp: badness_adj:')
- for i in badness_adj_re_realpath_list:
- log(' {} {}'.format(i[1], i[0]))
- else:
- log(' (not set)')
-
- log('5.6. Matching cmdlines with RE patterns')
- if len(badness_adj_re_cgroup_v2_list) > 0:
- log(' regexp: badness_adj:')
- for i in badness_adj_re_cmdline_list:
- log(' {} {}'.format(i[1], i[0]))
- else:
- log(' (not set)')
-
- log('5.7. Matching environ with RE patterns')
- if len(badness_adj_re_cgroup_v2_list) > 0:
- log(' regexp: badness_adj:')
- for i in badness_adj_re_environ_list:
- log(' {} {}'.format(i[1], i[0]))
- else:
- log(' (not set)')
-
- log('6. Customize corrective actions')
-
- if len(soft_actions_list) > 0:
- log(' Match by: regexp: command: ')
- for i in soft_actions_list:
- log(' {} {} {}'.format(i[0], i[1], i[2]))
- else:
- log(' (not set)')
-
- log('7. GUI notifications')
-
- log(' post_action_gui_notifications: {}'.format(
- post_action_gui_notifications))
- log(' low_memory_warnings_enabled: {}'.format(
- low_memory_warnings_enabled))
- log(' warning_exe: {}'.format(warning_exe))
- log(' warning_threshold_min_mem: {} MiB, {} %'.format(round(
- warning_threshold_min_mem_mb), round(warning_threshold_min_mem_percent, 1)))
- log(' warning_threshold_min_swap: {}'.format(warning_threshold_min_swap))
- log(' warning_threshold_max_zram: {} MiB, {} %'.format(round(
- warning_threshold_max_zram_mb), round(warning_threshold_max_zram_percent, 1)))
- log(' warning_threshold_max_psi: {}'.format(warning_threshold_max_psi))
- log(' min_post_warning_delay: {} sec'.format(min_post_warning_delay))
-
- log('8. Verbosity')
-
- log(' print_config_at_startup: {}'.format(print_config_at_startup))
- log(' print_mem_check_results: {}'.format(print_mem_check_results))
- log(' min_mem_report_interval: {} sec'.format(min_mem_report_interval))
- log(' debug_sleep: {}'.format(debug_sleep))
- log(' print_statistics: {}'.format(print_statistics))
- log(' print_proc_table: {}'.format(print_proc_table))
- log(' extra_table_info: {}'.format(extra_table_info))
- log(' print_victim_status: {}'.format(print_victim_status))
- log(' print_victim_cmdline: {}'.format(print_victim_cmdline))
- log(' max_victim_ancestry_depth: {}'.format(max_victim_ancestry_depth))
- log(' debug_gui_notifications: {}'.format(debug_gui_notifications))
- log(' separate_log: {}'.format(separate_log))
- log(' debug_psi: {}'.format(debug_psi))
-
- log('9. Misc')
-
- log(' max_soft_exit_time: {} sec'.format(max_soft_exit_time))
- log(' post_kill_exe: {}'.format(post_kill_exe))
- log(' forbid_negative_badness: {}'.format(
- forbid_negative_badness))
-
- # log(': {}'.format())
- log('#' * 79)
-
- if check_config_flag:
- log('config is OK')
- exit()
-
-
-def encoder(string):
- """
- """
- encoded = ''
- for i in string:
- encoded += str(ord(i)) + ':'
- return encoded[:-1]
-
-
-def get_swap_threshold_tuple(string):
- # re (Num %, True) or (Num KiB, False)
- """Returns KiB value if abs val was set in config, or tuple with %"""
- # return tuple with abs and bool: (abs %, True) or (abs MiB, False)
-
- if string.endswith('%'):
- valid = string_to_float_convert_test(string[:-1])
- if valid is None:
- errprint('somewhere swap unit is not float_%')
- exit(1)
-
- value = float(string[:-1].strip())
- if value < 0 or value > 100:
- errprint('invalid value, must be from the range[0; 100] %')
- exit(1)
-
- return value, True
-
- elif string.endswith('M'):
- valid = string_to_float_convert_test(string[:-1])
- if valid is None:
- errprint('somewhere swap unit is not float_M')
- exit(1)
-
- value = float(string[:-1].strip()) * 1024
- if value < 0:
- errprint('invalid unit in config (negative value)')
- exit(1)
-
- return value, False
-
- else:
- errprint(
- 'Invalid config file. There are invalid units somewhere\nExit')
- exit(1)
-
-
-def find_cgroup_indexes():
- """ Find cgroup-line positions in /proc/*/cgroup file.
- """
-
- cgroup_v1_index = cgroup_v2_index = None
-
- with open('/proc/self/cgroup') as f:
- for index, line in enumerate(f):
- if ':name=' in line:
- cgroup_v1_index = index
- if line.startswith('0::'):
- cgroup_v2_index = index
-
- return cgroup_v1_index, cgroup_v2_index
-
-
-def pid_to_rss(pid):
- """
- """
- try:
- rss = int(rline1(
- '/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE
- except IndexError:
- rss = None
- except FileNotFoundError:
- rss = None
- except ProcessLookupError:
- rss = None
- return rss
-
-
-def pid_to_vm_size(pid):
- """
- """
- try:
- vm_size = int(rline1(
- '/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE
- except IndexError:
- vm_size = None
- except FileNotFoundError:
- vm_size = None
- except ProcessLookupError:
- vm_size = None
- return vm_size
-
-
-def signal_handler(signum, frame):
- """
- """
- for i in sig_list:
- signal(i, signal_handler_inner)
- log('Signal handler called with the {} signal '.format(
- sig_dict[signum]))
- update_stat_dict_and_print(None)
- log('Exit')
- exit()
-
-
-def signal_handler_inner(signum, frame):
- """
- """
- log('Signal handler called with the {} signal (ignored) '.format(
- sig_dict[signum]))
-
-
-def exe(cmd):
- """
- """
-
- log('Execute the command: {}'.format(cmd))
- t0 = time()
- write_self_oom_score_adj(self_oom_score_adj_max)
- err = os.system(cmd)
- write_self_oom_score_adj(self_oom_score_adj_min)
- dt = time() - t0
- log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
- return err
-
-
-def go(func, *a):
- """ run func in new thread
- """
- t1 = time()
- try:
- Thread(target=func, args=a).start()
- except RuntimeError:
- print('RuntimeError: cannot spawn a new thread')
- return 1
- t2 = time()
- log('New thread spawned in {} ms'.format(
- round((t2 - t1) * 1000, 1)
- ))
- return 0
-
-
-def write(path, string):
- """
- """
- with open(path, 'w') as f:
- f.write(string)
-
-
-def write_self_oom_score_adj(new_value):
- """
- """
- if root:
- write('/proc/self/oom_score_adj', new_value)
-
-
-def valid_re(reg_exp):
- """Validate regular expression.
- """
- try:
- search(reg_exp, '')
- except invalid_re:
- log('Invalid config: invalid regexp: {}'.format(reg_exp))
- exit(1)
-
-
-def func_print_proc_table():
- """
- """
- print_proc_table = True
- find_victim(print_proc_table)
- exit()
-
-
-def log(*msg):
- """
- """
- try:
- print(*msg)
- except OSError:
- sleep(0.01)
- if separate_log:
- try:
- logging.info(*msg)
- except OSError:
- sleep(0.01)
-
-
-def print_version():
- """
- """
- try:
- v = rline1('/etc/nohang/version')
- except FileNotFoundError:
- v = None
- if v is None:
- print('nohang unknown version')
- else:
- print('nohang ' + v)
- exit()
-
-
-def pid_to_cgroup_v1(pid):
- """
- """
- cgroup_v1 = ''
- try:
- with open('/proc/' + pid + '/cgroup') as f:
- for index, line in enumerate(f):
- if index == cgroup_v1_index:
- cgroup_v1 = '/' + line.partition('/')[2][:-1]
- return cgroup_v1
- except FileNotFoundError:
- return ''
-
-
-def pid_to_cgroup_v2(pid):
- """
- """
- cgroup_v2 = ''
- try:
- with open('/proc/' + pid + '/cgroup') as f:
- for index, line in enumerate(f):
- if index == cgroup_v2_index:
- cgroup_v2 = line[3:-1]
- return cgroup_v2
- except FileNotFoundError:
- return ''
-
-
-def pid_to_starttime(pid):
- """ handle FNF error!
- """
- try:
- starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[
- 2].split(' ')[20]
-
- except UnicodeDecodeError:
- with open('/proc/' + pid + '/stat', 'rb') as f:
- starttime = f.read().decode('utf-8', 'ignore').rpartition(
- ')')[2].split(' ')[20]
-
- return float(starttime) / SC_CLK_TCK
-
-
-def get_victim_id(pid):
- """victim_id is starttime + pid"""
- try:
- return rline1('/proc/' + pid + '/stat').rpartition(
- ')')[2].split(' ')[20] + '_pid' + pid
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
-
-
-def pid_to_state(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/stat', 'rb') as f:
- return f.read(40).decode('utf-8', 'ignore').rpartition(')')[2][1]
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
- except IndexError:
- with open('/proc/' + pid + '/stat', 'rb') as f:
- return f.read().decode('utf-8', 'ignore').rpartition(')')[2][1]
-
-
-def pid_to_name(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/comm', 'rb') as f:
- return f.read().decode('utf-8', 'ignore')[:-1]
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
-
-
-def pid_to_ppid(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/status') as f:
- for n, line in enumerate(f):
- if n is ppid_index:
- return line.split('\t')[1].strip()
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
- except UnicodeDecodeError:
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
- for i in range(len(f_list)):
- if i is ppid_index:
- return f_list[i].split('\t')[1]
-
-
-def pid_to_ancestry(pid, max_victim_ancestry_depth=1):
- """
- """
- if max_victim_ancestry_depth == 1:
- ppid = pid_to_ppid(pid)
- pname = pid_to_name(ppid)
- return '\n PPID: {} ({})'.format(ppid, pname)
- if max_victim_ancestry_depth == 0:
- return ''
- anc_list = []
- for i in range(max_victim_ancestry_depth):
- ppid = pid_to_ppid(pid)
- pname = pid_to_name(ppid)
- anc_list.append((ppid, pname))
- if ppid == '1':
- break
- pid = ppid
- a = ''
- for i in anc_list:
- a = a + ' <= PID {} ({})'.format(i[0], i[1])
- return '\n Ancestry: ' + a[4:]
-
-
-def pid_to_cmdline(pid):
- """
- Get process cmdline by pid.
-
- pid: str pid of required process
- returns string cmdline
- """
- try:
- with open('/proc/' + pid + '/cmdline') as f:
- return f.read().replace('\x00', ' ').rstrip()
- except FileNotFoundError:
- return ''
-
-
-def pid_to_environ(pid):
- """
- Get process environ by pid.
-
- pid: str pid of required process
- returns string environ
- """
- try:
- with open('/proc/' + pid + '/environ') as f:
- return f.read().replace('\x00', ' ').rstrip()
- except FileNotFoundError:
- return ''
-
-
-def pid_to_realpath(pid):
- """
- """
- try:
- return os.path.realpath('/proc/' + pid + '/exe')
- except FileNotFoundError:
- return ''
-
-
-def pid_to_uid(pid):
- """return euid"""
- try:
- with open('/proc/' + pid + '/status') as f:
- for n, line in enumerate(f):
- if n is uid_index:
- return line.split('\t')[2]
- except UnicodeDecodeError:
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
- return f_list[uid_index].split('\t')[2]
- except FileNotFoundError:
- return ''
-
-
-def pid_to_badness(pid):
- """Find and modify badness (if it needs)."""
-
- try:
-
- oom_score = int(rline1('/proc/' + pid + '/oom_score'))
- badness = oom_score
-
- if ignore_positive_oom_score_adj:
- oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
- if oom_score_adj > 0:
- badness = badness - oom_score_adj
-
- if regex_matching:
- name = pid_to_name(pid)
- for re_tup in badness_adj_re_name_list:
- if search(re_tup[1], name) is not None:
- badness += int(re_tup[0])
-
- if re_match_cgroup_v1:
- cgroup_v1 = pid_to_cgroup_v1(pid)
- for re_tup in badness_adj_re_cgroup_v1_list:
- if search(re_tup[1], cgroup_v1) is not None:
- badness += int(re_tup[0])
-
- if re_match_cgroup_v2:
- cgroup_v2 = pid_to_cgroup_v2(pid)
- for re_tup in badness_adj_re_cgroup_v2_list:
- if search(re_tup[1], cgroup_v2) is not None:
- badness += int(re_tup[0])
-
- if re_match_realpath:
- realpath = pid_to_realpath(pid)
- for re_tup in badness_adj_re_realpath_list:
- if search(re_tup[1], realpath) is not None:
- badness += int(re_tup[0])
-
- if re_match_cmdline:
- cmdline = pid_to_cmdline(pid)
- for re_tup in badness_adj_re_cmdline_list:
- if search(re_tup[1], cmdline) is not None:
- badness += int(re_tup[0])
-
- if re_match_environ:
- environ = pid_to_environ(pid)
- for re_tup in badness_adj_re_environ_list:
- if search(re_tup[1], environ) is not None:
- badness += int(re_tup[0])
-
- if re_match_uid:
- uid = pid_to_uid(pid)
- for re_tup in badness_adj_re_uid_list:
- if search(re_tup[1], uid) is not None:
- badness += int(re_tup[0])
-
- if forbid_negative_badness:
- if badness < 0:
- badness = 0
-
- return badness, oom_score
-
- except FileNotFoundError:
- return None, None
- except ProcessLookupError:
- return None, None
-
-
-def pid_to_status(pid):
- """
- """
-
- try:
-
- with open('/proc/' + pid + '/status') as f:
-
- for n, line in enumerate(f):
-
- if n == 0:
- name = line.split('\t')[1][:-1]
-
- if n is state_index:
- state = line.split('\t')[1][0]
- continue
-
- if n is ppid_index:
- ppid = line.split('\t')[1][:-1]
- continue
-
- if n is uid_index:
- uid = line.split('\t')[2]
- continue
-
- if n is vm_size_index:
- vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_rss_index:
- vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_swap_index:
- vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
- break
-
- return name, state, ppid, uid, vm_size, vm_rss, vm_swap
-
- except UnicodeDecodeError:
- return pid_to_status_unicode(pid)
-
- except FileNotFoundError:
- return None
-
- except ProcessLookupError:
- return None
-
- except ValueError:
- return None
-
-
-def pid_to_status_unicode(pid):
- """
- """
- try:
-
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
-
- for i in range(len(f_list)):
-
- if i == 0:
- name = f_list[i].split('\t')[1]
-
- if i is state_index:
- state = f_list[i].split('\t')[1][0]
-
- if i is ppid_index:
- ppid = f_list[i].split('\t')[1]
-
- if i is uid_index:
- uid = f_list[i].split('\t')[2]
-
- if i is vm_size_index:
- vm_size = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_rss_index:
- vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_swap_index:
- vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- return name, state, ppid, uid, vm_size, vm_rss, vm_swap
-
- except FileNotFoundError:
- return None
-
- except ProcessLookupError:
- return None
-
- except ValueError:
- return None
-
-
-def uptime():
- """
- """
- return float(rline1('/proc/uptime').split(' ')[0])
-
-
-def errprint(*text):
- """
- """
- print(*text, file=stderr, flush=True)
-
-
-def mlockall():
- """Lock all memory to prevent swapping nohang process."""
-
- MCL_CURRENT = 1
- MCL_FUTURE = 2
- MCL_ONFAULT = 4
-
- libc = CDLL('libc.so.6', use_errno=True)
-
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
- )
- if result != 0:
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE
- )
- if result != 0:
- log('WARNING: cannot lock all memory')
- else:
- pass
- # log('All memory locked with MCL_CURRENT | MCL_FUTURE')
- else:
- pass
- # log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
-
-
-def update_stat_dict_and_print(key):
- """
- """
-
- if key is not None:
-
- if key not in stat_dict:
-
- stat_dict.update({key: 1})
-
- else:
-
- new_value = stat_dict[key] + 1
- stat_dict.update({key: new_value})
-
- if print_statistics:
-
- stats_msg = 'Total stat (what happened in the last {}):'.format(
- format_time(time() - start_time))
-
- for i in stat_dict:
- stats_msg += '\n {}: {}'.format(i, stat_dict[i])
-
- log(stats_msg)
-
-
-def find_psi_metrics_value(psi_path, psi_metrics):
- """
- """
-
- if psi_support:
-
- if psi_metrics == 'some_avg10':
- return float(rline1(psi_path).split(' ')[1].split('=')[1])
- if psi_metrics == 'some_avg60':
- return float(rline1(psi_path).split(' ')[2].split('=')[1])
- if psi_metrics == 'some_avg300':
- return float(rline1(psi_path).split(' ')[3].split('=')[1])
-
- if psi_metrics == 'full_avg10':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[1].split('=')[1])
- if psi_metrics == 'full_avg60':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[2].split('=')[1])
- if psi_metrics == 'full_avg300':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[3].split('=')[1])
-
-
-def check_mem_and_swap():
- """find mem_available, swap_total, swap_free"""
- with open('/proc/meminfo') as f:
- for n, line in enumerate(f):
- if n == 2:
- mem_available = int(line.split(':')[1][:-4])
- continue
- if n is swap_total_index:
- swap_total = int(line.split(':')[1][:-4])
- continue
- if n is swap_free_index:
- swap_free = int(line.split(':')[1][:-4])
- break
- return mem_available, swap_total, swap_free
-
-
-def check_zram():
- """find MemUsedZram"""
- disksize_sum = 0
- mem_used_total_sum = 0
-
- for dev in os.listdir('/sys/block'):
- if dev.startswith('zram'):
- stat = zram_stat(dev)
- disksize_sum += int(stat[0])
- mem_used_total_sum += int(stat[1])
-
- # Means that when setting zram disksize = 1 GiB available memory
- # decrease by 0.0042 GiB.
- # Found experimentally, requires clarification with different kernaels and
- # architectures.
- # On small disk drives (up to gigabyte) it can be more, up to 0.0045.
- # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should
- # be 0.001:
- # ("zram uses about 0.1% of the size of the disk"
- # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
- # but this statement contradicts the experimental data.
- # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize
- # Found experimentally.
- ZRAM_DISKSIZE_FACTOR = 0.0042
-
- return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
-
-
-'''
-def format_time(t):
- t = int(t)
- if t < 60:
- return '{} sec'.format(t)
- if t >= 60 and t < 3600:
- m = t // 60
- s = t % 60
- return '{} min {} sec'.format(m, s)
- h = t // 3600
- s0 = t - h * 3600
- m = s0 // 60
- s = s0 % 60
- return '{} h {} min {} sec'.format(h, m, s)
-'''
-
-
-def format_time(t):
- t = int(t)
-
- if t < 60:
- return '{} sec'.format(t)
-
- if t > 3600:
- h = t // 3600
- s0 = t - h * 3600
- m = s0 // 60
- s = s0 % 60
- return '{} h {} min {} sec'.format(h, m, s)
-
- m = t // 60
- s = t % 60
- return '{} min {} sec'.format(m, s)
-
-
-def string_to_float_convert_test(string):
- """Try to interprete string values as floats."""
- try:
- return float(string)
- except ValueError:
- return None
-
-
-def string_to_int_convert_test(string):
- """Try to interpret string values as integers."""
- try:
- return int(string)
- except ValueError:
- return None
-
-
-def conf_parse_string(param):
- """
- Get string parameters from the config dict.
-
- param: config_dict key
- returns config_dict[param].strip()
- """
- if param in config_dict:
- return config_dict[param].strip()
- else:
- errprint('All the necessary parameters must be in the config')
- errprint('There is no "{}" parameter in the config'.format(param))
- exit(1)
-
-
-def conf_parse_bool(param):
- """
- Get bool parameters from the config_dict.
-
- param: config_dicst key
- returns bool
- """
- if param in config_dict:
- param_str = config_dict[param]
- if param_str == 'True':
- return True
- elif param_str == 'False':
- return False
- else:
- errprint('Invalid value of the "{}" parameter.'.format(param))
- errprint('Valid values are True and False.')
- errprint('Exit')
- exit(1)
- else:
- errprint('All the necessary parameters must be in the config')
- errprint('There is no "{}" parameter in the config'.format(param))
- exit(1)
-
-
-def rline1(path):
- """read 1st line from path."""
- try:
- with open(path) as f:
- for line in f:
- return line[:-1]
- except UnicodeDecodeError:
- with open(path, 'rb') as f:
- return f.read(999).decode(
- 'utf-8', 'ignore').split('\n')[0] # use partition()!
-
-
-def kib_to_mib(num):
- """Convert KiB values to MiB values."""
- return round(num / 1024.0)
-
-
-def percent(num):
- """Interprete num as percentage."""
- return round(num * 100, 1)
-
-
-def just_percent_mem(num):
- """convert num to percent and justify"""
- return str(round(num * 100, 1)).rjust(4, ' ')
-
-
-def just_percent_swap(num):
- """
- """
- return str(round(num * 100, 1)).rjust(5, ' ')
-
-
-def human(num, lenth):
- """Convert KiB values to MiB values with right alignment"""
- return str(round(num / 1024)).rjust(lenth, ' ')
-
-
-def zram_stat(zram_id):
- """
- Get zram state.
-
- zram_id: str zram block-device id
- returns bytes disksize, str mem_used_total
- """
- try:
- disksize = rline1('/sys/block/' + zram_id + '/disksize')
- except FileNotFoundError:
- return '0', '0'
- if disksize == ['0\n']:
- return '0', '0'
- try:
- mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ')
- mm_stat_list = []
- for i in mm_stat:
- if i != '':
- mm_stat_list.append(i)
- mem_used_total = mm_stat_list[2]
- except FileNotFoundError:
- mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total')
- return disksize, mem_used_total # BYTES, str
-
-
-def send_notify_warn():
- """
- Look for process with maximum 'badness' and warn user with notification.
- (implement Low memory warnings)
- """
- log('Warning threshold exceeded')
-
- if check_warning_exe:
- exe(warning_exe)
-
- else:
-
- title = 'Low memory'
-
- body = 'MemAvail: {}%\nSwapFree: {}%'.format(
- round(mem_available / mem_total * 100),
- round(swap_free / (swap_total + 0.1) * 100)
- )
-
- send_notification(title, body)
-
-
-def send_notify(threshold, name, pid):
- """
- Notificate about OOM Preventing.
-
- threshold: key for notify_sig_dict
- name: str process name
- pid: str process pid
- """
-
- title = 'Freeze prevention'
- body = '{} [{}] {}'.format(
- notify_sig_dict[threshold],
- pid,
- name.replace(
- # symbol '&' can break notifications in some themes,
- # therefore it is replaced by '*'
- '&', '*'
- )
- )
-
- send_notification(title, body)
-
-
-def send_notify_etc(pid, name, command):
- """
- Notificate about OOM Preventing.
-
- command: str command that will be executed
- name: str process name
- pid: str process pid
- """
- title = 'Freeze prevention'
- body = 'Victim is [{}] {}\nExecute the co' \
- 'mmand:\n{}'.format(
- pid, name.replace('&', '*'), command.replace('&', '*'))
-
- send_notification(title, body)
-
-
-def send_notification(title, body):
- """
- """
- cmd = '{} "--euid={}" "--debug={}" "--title={}" "--body={}" &'.format(
- notify_helper_path,
- self_uid,
- debug_gui_notifications,
- title,
- encoder(body))
-
- go(exe, cmd)
-
-
-def get_pid_list():
- """
- Find pid list expect kthreads and zombies
- """
- pid_list = []
- for pid in os.listdir('/proc'):
- if os.path.exists('/proc/' + pid + '/exe'):
- pid_list.append(pid)
- return pid_list
-
-
-def get_non_decimal_pids():
- """
- """
- non_decimal_list = []
- for pid in pid_list:
- if pid[0].isdecimal() is False:
- non_decimal_list.append(pid)
- return non_decimal_list
-
-
-def find_victim(_print_proc_table):
- """
- Find the process with highest badness and its badness adjustment
- Return pid and badness
- """
-
- ft1 = time()
-
- pid_list = get_pid_list()
-
- pid_list.remove(self_pid)
-
- if '1' in pid_list:
- pid_list.remove('1')
-
- non_decimal_list = get_non_decimal_pids()
-
- for i in non_decimal_list:
- if i in pid_list:
- pid_list.remove(i)
-
- pid_badness_list = []
-
- if _print_proc_table:
-
- if extra_table_info == 'None':
- extra_table_title = ''
-
- elif extra_table_info == 'cgroup_v1':
- extra_table_title = 'CGroup_v1'
-
- elif extra_table_info == 'cgroup_v2':
- extra_table_title = 'CGroup_v2'
-
- elif extra_table_info == 'cmdline':
- extra_table_title = 'cmdline'
-
- elif extra_table_info == 'environ':
- extra_table_title = 'environ'
-
- elif extra_table_info == 'realpath':
- extra_table_title = 'realpath'
-
- else:
- extra_table_title = ''
-
- hr = '#' * 107
-
- log(hr)
- log('# PID PPID badness oom_score oom_score_adj e'
- 'UID S VmSize VmRSS VmSwap Name {}'.format(
- extra_table_title))
- log('#------- ------- ------- --------- ------------- -------'
- '--- - ------ ----- ------ ---------------')
-
- for pid in pid_list:
-
- badness = pid_to_badness(pid)[0]
-
- if badness is None:
- continue
-
- if _print_proc_table:
-
- try:
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
- except FileNotFoundError:
- continue
-
- if pid_to_status(pid) is None:
- continue
- else:
- (name, state, ppid, uid, vm_size, vm_rss,
- vm_swap) = pid_to_status(pid)
-
- if extra_table_info == 'None':
- extra_table_line = ''
-
- elif extra_table_info == 'cgroup_v1':
- extra_table_line = pid_to_cgroup_v1(pid)
-
- elif extra_table_info == 'cgroup_v2':
- extra_table_line = pid_to_cgroup_v2(pid)
-
- elif extra_table_info == 'cmdline':
- extra_table_line = pid_to_cmdline(pid)
-
- elif extra_table_info == 'environ':
- extra_table_line = pid_to_environ(pid)
-
- elif extra_table_info == 'realpath':
- extra_table_line = pid_to_realpath(pid)
-
- else:
- extra_table_line = ''
-
- log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format(
- pid.rjust(7),
- ppid.rjust(7),
- str(badness).rjust(7),
- oom_score.rjust(9),
- oom_score_adj.rjust(13),
- uid.rjust(10),
- state,
- str(vm_size).rjust(6),
- str(vm_rss).rjust(5),
- str(vm_swap).rjust(6),
- name.ljust(15),
- extra_table_line
- )
- )
-
- pid_badness_list.append((pid, badness))
-
- real_proc_num = len(pid_badness_list)
-
- # Make list of (pid, badness) tuples, sorted by 'badness' values
- # print(pid_badness_list)
- pid_tuple_list = sorted(
- pid_badness_list,
- key=itemgetter(1),
- reverse=True
- )[0]
-
- pid = pid_tuple_list[0]
- victim_id = get_victim_id(pid)
-
- # Get maximum 'badness' value
- victim_badness = pid_tuple_list[1]
- victim_name = pid_to_name(pid)
-
- if _print_proc_table:
- log(hr)
-
- log('Found {} processes with existing /proc/[pid]/exe realpath'.format(
- real_proc_num))
-
- log(
- 'Process with highest badness (found in {} ms):\n PID: {}, Na'
- 'me: {}, badness: {}'.format(
- round((time() - ft1) * 1000),
- pid,
- victim_name,
- victim_badness
- )
- )
-
- return pid, victim_badness, victim_name, victim_id
-
-
-def find_victim_info(pid, victim_badness, name):
- """
- """
- status0 = time()
-
- try:
-
- with open('/proc/' + pid + '/status') as f:
-
- for n, line in enumerate(f):
-
- if n is state_index:
- state = line.split('\t')[1].rstrip()
- continue
-
- """
- if n is ppid_index:
- # ppid = line.split('\t')[1]
- continue
- """
-
- if n is uid_index:
- uid = line.split('\t')[2]
- continue
-
- if n is vm_size_index:
- vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_rss_index:
- vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if detailed_rss:
-
- if n is anon_index:
- anon_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is file_index:
- file_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is shmem_index:
- shmem_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_swap_index:
- vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
- break
-
- if print_victim_cmdline:
- cmdline = pid_to_cmdline(pid)
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
-
- except FileNotFoundError:
- log('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
- except ProcessLookupError:
- log('The victim died in the search process: ProcessLookupError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ProcessLookupError')
- return None
- except UnicodeDecodeError:
-
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
-
- for i in range(len(f_list)):
-
- if i is state_index:
- state = f_list[i].split('\t')[1].rstrip()
-
- """
- if i is ppid_index:
- pass
- # ppid = f_list[i].split('\t')[1]
- """
-
- if i is uid_index:
- uid = f_list[i].split('\t')[2]
-
- if i is vm_size_index:
- vm_size = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_rss_index:
- vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- if detailed_rss:
-
- if i is anon_index:
- anon_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is file_index:
- file_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is shmem_index:
- shmem_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_swap_index:
- vm_swap = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if print_victim_cmdline:
- cmdline = pid_to_cmdline(pid)
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
-
- except IndexError:
- log('The victim died in the search process: IndexError')
- update_stat_dict_and_print(
- 'The victim died in the search process: IndexError')
- return None
- except ValueError:
- log('The victim died in the search process: ValueError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ValueError')
- return None
- except FileNotFoundError:
- log('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
- except ProcessLookupError:
- log('The victim died in the search process: ProcessLookupError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ProcessLookupError')
- return None
-
- len_vm = len(str(vm_size))
-
- try:
- realpath = os.path.realpath('/proc/' + pid + '/exe')
- victim_lifetime = format_time(uptime() - pid_to_starttime(pid))
- victim_cgroup_v1 = pid_to_cgroup_v1(pid)
- victim_cgroup_v2 = pid_to_cgroup_v2(pid)
-
- except FileNotFoundError:
- log('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
-
- ancestry = pid_to_ancestry(pid, max_victim_ancestry_depth)
-
- if print_victim_cmdline is False:
- cmdline = ''
- c1 = ''
- else:
- c1 = '\n Cmdline: '
-
- if detailed_rss:
- detailed_rss_info = ' (' \
- 'Anon: {} MiB, ' \
- 'File: {} MiB, ' \
- 'Shmem: {} MiB)'.format(
- anon_rss,
- file_rss,
- shmem_rss)
- else:
- detailed_rss_info = ''
-
- victim_info = 'Victim status (found in {} ms):' \
- '\n Name: {}' \
- '\n State: {}' \
- '\n PID: {}' \
- '{}' \
- '\n EUID: {}' \
- '\n badness: {}, ' \
- 'oom_score: {}, ' \
- 'oom_score_adj: {}' \
- '\n VmSize: {} MiB' \
- '\n VmRSS: {} MiB {}' \
- '\n VmSwap: {} MiB' \
- '\n CGroup_v1: {}' \
- '\n CGroup_v2: {}' \
- '\n Realpath: {}' \
- '{}{}' \
- '\n Lifetime: {}'.format(
- round((time() - status0) * 1000),
- name,
- state,
- pid,
- ancestry,
- uid,
- victim_badness,
- oom_score,
- oom_score_adj,
- vm_size,
- str(vm_rss).rjust(len_vm),
- detailed_rss_info,
- str(vm_swap).rjust(len_vm),
- victim_cgroup_v1,
- victim_cgroup_v2,
- realpath,
- c1, cmdline,
- victim_lifetime)
-
- return victim_info
-
-
-def check_mem_swap_ex():
- """
- Check: is mem and swap threshold exceeded?
- Return: None, (SIGTERM, meminfo), (SIGKILL, meminfo)
- """
-
- mem_available, swap_total, swap_free = check_mem_and_swap()
-
- # if hard_threshold_min_swap is set in percent
- if swap_kill_is_percent:
- hard_threshold_min_swap_kb = swap_total * \
- hard_threshold_min_swap_percent / 100.0
- else:
- hard_threshold_min_swap_kb = swap_kb_dict['hard_threshold_min_swap_kb']
-
- if swap_term_is_percent:
- soft_threshold_min_swap_kb = swap_total * \
- soft_threshold_min_swap_percent / 100.0
- else:
- soft_threshold_min_swap_kb = swap_kb_dict['soft_threshold_min_swap_kb']
-
- if swap_warn_is_percent:
- warning_threshold_min_swap_kb = swap_total * \
- warning_threshold_min_swap_percent / 100.0
- else:
- warning_threshold_min_swap_kb = swap_kb_dict['warning_threshold_min_swap_kb']
-
- if swap_total > hard_threshold_min_swap_kb:
- swap_sigkill_pc = percent(
- hard_threshold_min_swap_kb / (swap_total + 0.1))
- else:
- swap_sigkill_pc = '-'
-
- if swap_total > soft_threshold_min_swap_kb:
- swap_sigterm_pc = percent(
- soft_threshold_min_swap_kb / (swap_total + 0.1))
- else:
- swap_sigterm_pc = '-'
-
- if (mem_available <= hard_threshold_min_mem_kb and
- swap_free <= hard_threshold_min_swap_kb):
-
- mem_info = 'Memory status that requ' \
- 'ires corrective actions (hard threshold exceeded):' \
- '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
- 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
- 'p_min_sigkill [{} MiB, {} %]'.format(
- kib_to_mib(mem_available),
- percent(mem_available / mem_total),
- kib_to_mib(hard_threshold_min_mem_kb),
- percent(hard_threshold_min_mem_kb / mem_total),
- kib_to_mib(swap_free),
- percent(swap_free / (swap_total + 0.1)),
- kib_to_mib(hard_threshold_min_swap_kb),
- swap_sigkill_pc)
-
- return (SIGKILL, mem_info, mem_available, hard_threshold_min_swap_kb,
- soft_threshold_min_swap_kb, swap_free, swap_total)
-
- if (mem_available <= soft_threshold_min_mem_kb and
- swap_free <= soft_threshold_min_swap_kb):
-
- mem_info = 'Memory status that requi' \
- 'res corrective actions (soft threshold exceeded):' \
- '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
- 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
- 'p_min_sigterm [{} MiB, {} %]'.format(
- kib_to_mib(mem_available),
- percent(mem_available / mem_total),
- kib_to_mib(soft_threshold_min_mem_kb),
- round(soft_threshold_min_mem_percent, 1),
- kib_to_mib(swap_free),
- percent(swap_free / (swap_total + 0.1)),
- kib_to_mib(soft_threshold_min_swap_kb),
- swap_sigterm_pc)
-
- return (SIGTERM, mem_info, mem_available, hard_threshold_min_swap_kb,
- soft_threshold_min_swap_kb, swap_free, swap_total)
-
- if low_memory_warnings_enabled:
-
- if (mem_available <= warning_threshold_min_mem_kb and swap_free <=
- warning_threshold_min_swap_kb + 0.1):
- return ('WARN', None, mem_available, hard_threshold_min_swap_kb,
- soft_threshold_min_swap_kb, swap_free, swap_total)
-
- return (None, None, mem_available, hard_threshold_min_swap_kb,
- soft_threshold_min_swap_kb, swap_free, swap_total)
-
-
-def check_zram_ex():
- """
- """
- mem_used_zram = check_zram()
-
- if mem_used_zram >= hard_threshold_max_zram_kb:
-
- mem_info = 'Memory status that requir' \
- 'es corrective actions (hard threshold exceeded):' \
- '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
- 'kill [{} MiB, {} %]'.format(
- kib_to_mib(mem_used_zram),
- percent(mem_used_zram / mem_total),
- kib_to_mib(hard_threshold_max_zram_kb),
- percent(hard_threshold_max_zram_kb / mem_total))
-
- return SIGKILL, mem_info, mem_used_zram
-
- if mem_used_zram >= soft_threshold_max_zram_kb:
-
- mem_info = 'Memory status that requires corrective actions (soft th' \
- 'reshold exceeded):\n MemUsedZram [{} MiB, {} %] >= zram_max_s' \
- 'igterm [{} M, {} %]'.format(
- kib_to_mib(mem_used_zram),
- percent(mem_used_zram / mem_total),
- kib_to_mib(soft_threshold_max_zram_kb),
- percent(soft_threshold_max_zram_kb / mem_total))
-
- return SIGTERM, mem_info, mem_used_zram
-
- if low_memory_warnings_enabled:
- if mem_used_zram >= warning_threshold_max_zram_kb:
- return 'WARN', None, mem_used_zram
-
- return None, None, mem_used_zram
-
-
-def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0):
- """
- """
-
- delta0 = time() - x0
- x0 = time()
-
- psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
- # print(psi_avg_value)
-
- psi_post_action_delay_timer = time() - last_action_dict['t'] # psi_t0
-
- if psi_post_action_delay_timer >= psi_post_action_delay:
- psi_post_action_delay_exceeded = True
- else:
- psi_post_action_delay_exceeded = False
-
- if psi_avg_value >= hard_threshold_max_psi:
- sigkill_psi_exceeded = True
- psi_kill_exceeded_timer += delta0
- else:
- sigkill_psi_exceeded = False
- psi_kill_exceeded_timer = 0
-
- if debug_psi:
-
- log('psi_post_action_delay_timer: {}'.format(
- round(psi_post_action_delay_timer, 3)))
-
- log('psi_post_action_delay_exceeded: {}\nsigkill_psi_exceeded'
- ': {}\npsi_kill_exceeded_timer: {}'.format(
- psi_post_action_delay_exceeded,
- sigkill_psi_exceeded,
- round(psi_kill_exceeded_timer, 1)
- )
- )
-
- if (psi_kill_exceeded_timer >= psi_excess_duration and
- psi_post_action_delay_exceeded):
-
- mem_info = 'PSI avg ({}) > hard_threshold_max_psi ({})\n' \
- 'PSI avg exceeded psi_excess_duration (value' \
- ' = {} sec) for {} seconds'.format(
- psi_avg_value,
- hard_threshold_max_psi,
- psi_excess_duration,
- round(psi_kill_exceeded_timer, 1)
- )
-
- return (SIGKILL, mem_info, psi_t0, psi_kill_exceeded_timer,
- psi_term_exceeded_timer, x0)
-
- if psi_avg_value >= soft_threshold_max_psi:
- sigterm_psi_exceeded = True
- psi_term_exceeded_timer += delta0
- else:
- sigterm_psi_exceeded = False
- psi_term_exceeded_timer = 0
-
- if debug_psi:
-
- log('sigterm_psi_exceeded: {}\n'
- 'psi_term_exceeded_timer: {}\n'.format(
- sigterm_psi_exceeded,
- round(psi_term_exceeded_timer, 1)
- )
- )
-
- if (psi_term_exceeded_timer >= psi_excess_duration and
- psi_post_action_delay_exceeded):
-
- mem_info = 'PSI avg ({}) > soft_threshold_max_psi ({})\n' \
- 'PSI avg exceeded psi_excess_duration (value' \
- ' = {} sec) for {} seconds'.format(
- psi_avg_value,
- soft_threshold_max_psi,
- psi_excess_duration,
- round(psi_term_exceeded_timer, 1)
- )
-
- return (SIGTERM, mem_info, psi_t0, psi_kill_exceeded_timer,
- psi_term_exceeded_timer, x0)
-
- if low_memory_warnings_enabled:
-
- if psi_avg_value >= warning_threshold_max_psi:
- return ('WARN', None, psi_t0, psi_kill_exceeded_timer,
- psi_term_exceeded_timer, x0)
-
- return (None, None, psi_t0, psi_kill_exceeded_timer,
- psi_term_exceeded_timer, x0)
-
-
-def is_victim_alive(victim_id):
- """
- We do not have a reliable sign of the end of the release of memory:
- https://github.com/rfjakob/earlyoom/issues/128#issuecomment-507023717
-
- Варианты возврата:
- 0 X, nonexist, другой процесс (полн конец имплементации, можно не делать POST SIGKILL DELAY)
- 1 rp true
- 2 R освобождает память. Ждем смерти.
- 3 Z возможно уже освободил память. Конец отслеживания
- """
-
- # Проверка целостности жертвы
- starttime, pid = victim_id.split('_pid')
- new_victim_id = get_victim_id(pid)
- if victim_id != new_victim_id:
- return 0
-
- # Жива ли жертва?
- exe_exists = os.path.exists('/proc/{}/exe'.format(pid))
- if exe_exists:
- return 1
-
- # далее жертва смертельно ранена. Дифференцируемся по State.
- # R -> 2 # отслеживать жертву дальше
- # X, FNFE, PLE -> 0
-
- state = pid_to_state(pid)
-
- if state == 'R':
- return 2
-
- if state == 'Z':
- return 3
-
- if state == 'X' or state == '':
- return 0
-
- return 0
-
-
-def implement_corrective_action(
- threshold,
- mem_info_list,
- psi_t0,
- psi_kill_exceeded_timer,
- psi_term_exceeded_timer,
- x0,
- psi_threshold,
- zram_threshold,
- zram_info,
- psi_info):
-
- log('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
-
- debug_corrective_action = True
-
- time0 = time()
-
- # 1. Очистка словаря от мертвых. Итерация по словарю, отслеживание умирающих.
- # 2. Итерация по оставшемуся словарю. Поиск дельт. Если хоть у одного
- # дельта НЕ истекла - ЖДЕМ, выход из фции.
-
- # print(v_dict)
- nu = []
-
- for victim_id in v_dict:
- iva = is_victim_alive(victim_id)
- #print(iva, victim_id)
- if iva == 0 or iva == 3:
- nu.append(victim_id)
- """
- continue
- if iva == 1:
- continue
- if iva == 2:
- pass # быстро отследить умирающего
- """
-
- for i in nu:
- if debug_corrective_action:
- log('Remove {} from v_dict'.format(i))
- v_dict.pop(i)
-
- x = False
- cache_list = []
- #cache_list.append(('foo', 0.01))
- #cache_list.append(('boo', 1111.01))
- # 2
- # print(v_dict)
-
- for victim_id in v_dict:
- tx = v_dict[victim_id]['time']
- ddt = time() - tx
- if ddt < victim_cache_time:
-
- if debug_corrective_action:
- log(
- 'victim_cache_time is not exceeded for {} ({} < {})'.format(
- victim_id, round(ddt, 3), victim_cache_time
- )
- )
- x = True
- cache_list.append((victim_id, ddt))
- break
-
- if x:
- # print(cache_list)
- e = sorted(cache_list, key=itemgetter(1), reverse=False)
- cached_victim_id = e[0][0]
-
- for i in mem_info_list:
- log(i)
-
- if x:
- victim_id = cached_victim_id
- pid = victim_id.partition('_pid')[2]
- victim_badness = pid_to_badness(pid)[0]
- name = v_dict[victim_id]['name']
- log('New victim is cached victim {} ({})'.format(pid, name))
- else:
- pid, victim_badness, name, victim_id = find_victim(print_proc_table)
-
- log('Recheck memory levels...')
-
- (masf_threshold, masf_info, mem_available, hard_threshold_min_swap_kb,
- soft_threshold_min_swap_kb, swap_free, swap_total) = check_mem_swap_ex()
-
- if CHECK_ZRAM:
- zram_threshold, zram_info, mem_used_zram = check_zram_ex()
-
- if CHECK_PSI:
- (psi_threshold, psi_info, psi_t0, psi_kill_exceeded_timer,
- psi_term_exceeded_timer, x0) = check_psi_ex(
- psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0)
-
- if (masf_threshold is SIGKILL or zram_threshold is SIGKILL or
- psi_threshold is SIGKILL):
-
- new_threshold = SIGKILL
- mem_info_list = []
-
- if masf_threshold is SIGKILL or masf_threshold is SIGTERM:
- mem_info_list.append(masf_info)
-
- if zram_threshold is SIGKILL or zram_threshold is SIGTERM:
- mem_info_list.append(zram_info)
-
- if psi_threshold is SIGKILL or psi_threshold is SIGTERM:
- mem_info_list.append(psi_info)
-
- elif (masf_threshold is SIGTERM or zram_threshold is SIGTERM or
- psi_threshold is SIGTERM):
-
- new_threshold = SIGTERM
- mem_info_list = []
-
- if masf_threshold is SIGKILL or masf_threshold is SIGTERM:
- mem_info_list.append(masf_info)
-
- if zram_threshold is SIGKILL or zram_threshold is SIGTERM:
- mem_info_list.append(zram_info)
-
- if psi_threshold is SIGKILL or psi_threshold is SIGTERM:
- mem_info_list.append(psi_info)
-
- else:
- log('Thresholds is not exceeded now')
- return psi_t0
-
- for i in mem_info_list:
- log(i)
-
- if new_threshold is None or new_threshold == 'WARN':
- log('Thresholds is not exceeded now')
- return psi_t0
-
- threshold = new_threshold
-
- vwd = None # Victim Will Die
-
- if victim_badness >= min_badness:
-
- if threshold is SIGTERM:
- if victim_id in v_dict:
- dt = time() - v_dict[victim_id]['time']
- if dt > max_soft_exit_time:
- log('max_soft_exit_time is exceeded: the '
- 'victim will get SIGKILL')
- threshold = SIGKILL
- else:
- log('max_soft_exit_time is not exceeded ('
- '{} < {}) for the victim'.format(round(
- dt, 1), max_soft_exit_time))
-
- if debug_sleep:
- log('Sleep {} sec (over_sleep)'.format(over_sleep))
- sleep(over_sleep)
-
- log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
-
- return psi_t0
-
- # log('Try to implement a corrective action...')
-
- if print_victim_status:
- # victim badness ищи снова, не полагайся на старое
- victim_info = find_victim_info(pid, victim_badness, name)
- log(victim_info)
-
- soft_match = False
- if soft_actions and threshold is SIGTERM:
- name = pid_to_name(pid)
- cgroup_v1 = pid_to_cgroup_v1(pid)
- service = ''
- cgroup_v1_tail = cgroup_v1.rpartition('/')[2]
- if cgroup_v1_tail.endswith('.service'):
- service = cgroup_v1_tail
- for i in soft_actions_list:
- unit = i[0]
- if unit == 'name':
- u = name
- else:
- u = cgroup_v1
- regexp = i[1]
- command = i[2]
-
- if search(regexp, u) is not None:
- log("Regexp '{}' matches with {} '{}'".format(
- regexp, unit, u))
- soft_match = True
- break
-
- if soft_match:
-
- cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name(
- pid)).replace('$SERVICE', service)
- go(exe, cmd)
-
- """
- if exit_status == 0:
- success = True
- else:
- success = False
- """
-
- response_time = time() - time0
-
- exit_status = None
-
- preventing_oom_message = 'Implement a corrective act' \
- 'ion:\n Run the command: {}' \
- '\n Exit status: {}; total response ' \
- 'time: {} ms'.format(
- cmd,
- exit_status,
- round(response_time * 1000))
-
- else:
-
- try:
- os.kill(int(pid), threshold)
-
- response_time = time() - time0
-
- send_result = 'total response time: {} ms'.format(
- round(response_time * 1000))
-
- preventing_oom_message = 'Implement a corrective action:' \
- '\n Send {} to the victim; {}'.format(
- sig_dict[threshold], send_result)
-
- # success = True
-
- if threshold is SIGKILL:
- vwd = True
-
- except FileNotFoundError:
- vwd = True
- # success = False
- # response_time = time() - time0
- # send_result = 'no such process; response time: {} ms'.format(round(response_time * 1000))
- key = 'The victim died in the search process: ' \
- 'FileNotFoundError'
- except ProcessLookupError:
- vwd = True
- # success = False
- # response_time = time() - time0
- # send_result = 'no such process; response time: {} ms'.format(round(response_time * 1000))
- key = 'The victim died in the search process: ' \
- 'ProcessLookupError'
-
- try:
- log(preventing_oom_message)
- except UnboundLocalError:
- pass
- # preventing_oom_message = key
-
- if not vwd:
- if victim_id not in v_dict:
- v_dict[victim_id] = dict()
- v_dict[victim_id]['time'] = time()
- v_dict[victim_id]['name'] = name
- else:
- pass
-
- last_action_dict['t'] = kill_timestamp = time()
-
- # print(v_dict)
-
- # response_time = time() - time0
-
- # log('success: ' + str(success))
- # log('victim will die: ' + str(vwd))
- # log('response_time: ' + str(response_time) + ' sec')
-
- # НАЧАЛО ОТСЛЕЖИВАНИЯ СОСТОЯНИЯ ЖЕРТВЫ. Можно вынести в отд фц. Приним
- # айди, логирует, возвращает что-то.
-
- # Далее поработать со словарями. Жертва тут умерла - сброс таймера. Все
- # старые жертвы умерли до 3х секунд с следующих циклах - сброс таймера.
- # После этого все должно быть супер охуенно.
-
- while True:
- sleep(0.005)
- d = time() - kill_timestamp
- #print('Прошло времени:', d)
- iva = is_victim_alive(victim_id)
-
- if iva == 0:
-
- log('The victim died in {} sec'.format(round(d, 3)))
-
- if victim_id in v_dict:
- v_dict.pop(victim_id)
- break
-
- elif iva == 1:
- #print('Жива и занимает память')
- if not vwd and d > sensitivity_test_time:
-
- log("The victim doesn't respond on corrective action in {} sec".format(
- round(d, 3)))
-
- break
-
- elif iva == 2:
- pass
- #print('Смертельно ранена и освобождает память. Дождаться окончания освобождения памяти.')
-
- else: # 3
- #print('Z и быстро освобождает память, если еще не. Поспать немножно и выйти из цикла.')
-
- log('The victim became a zombie in {} sec'.format(round(d, 3)))
-
- if victim_id in v_dict:
- v_dict.pop(victim_id)
- sleep(post_zombie_delay)
- break
-
- mem_available, swap_total, swap_free = check_mem_and_swap()
- ma_mib = int(mem_available) / 1024.0
- sf_mib = int(swap_free) / 1024.0
- log('Memory status after implementing a corrective act'
- 'ion:\n MemAvailable'
- ': {} MiB, SwapFree: {} MiB'.format(
- round(ma_mib, 1), round(sf_mib, 1)))
-
- if soft_match is False:
- key = 'Send {} to {}'.format(sig_dict[threshold], name)
- update_stat_dict_and_print(key)
- else:
- key = "Run the command '{}'".format(command)
- update_stat_dict_and_print(key)
-
- if threshold is SIGKILL and post_kill_exe != '':
-
- cmd = post_kill_exe.replace('$PID', pid).replace(
- '$NAME', pid_to_name(pid))
-
- log('Execute post_kill_exe')
-
- go(exe, cmd)
-
- if post_action_gui_notifications:
- if soft_match:
- send_notify_etc(pid, name, cmd)
- else:
- send_notify(threshold, name, pid)
-
- else:
-
- response_time = time() - time0
- victim_badness_is_too_small = 'victim badness ({}) < min_b' \
- 'adness ({}); nothing to do; response time: {} ms'.format(
- victim_badness,
- min_badness,
- round(response_time * 1000))
-
- log(victim_badness_is_too_small)
-
- # update stat_dict
- key = 'victim badness < min_badness'
- update_stat_dict_and_print(key)
-
- if vwd is None:
-
- if debug_sleep:
- log('Sleep {} sec (over_sleep)'.format(over_sleep))
- sleep(over_sleep)
-
- log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
-
- return psi_t0
-
-
-def sleep_after_check_mem():
- """Specify sleep times depends on rates and avialable memory."""
-
- if stable_sleep:
-
- if debug_sleep:
- log('Sleep {} sec'.format(min_sleep))
- stdout.flush()
- sleep(min_sleep)
- return None
-
- if hard_threshold_min_mem_kb < soft_threshold_min_mem_kb:
- mem_point = mem_available - soft_threshold_min_mem_kb
- else:
- mem_point = mem_available - hard_threshold_min_mem_kb
-
- if hard_threshold_min_swap_kb < soft_threshold_min_swap_kb:
- swap_point = swap_free - soft_threshold_min_swap_kb
- else:
- swap_point = swap_free - hard_threshold_min_swap_kb
-
- if swap_point < 0:
- swap_point = 0
-
- if mem_point < 0:
- mem_point = 0
-
- t_mem = mem_point / fill_rate_mem
- t_swap = swap_point / fill_rate_swap
-
- if CHECK_ZRAM:
- t_zram = (mem_total * 0.8 - mem_used_zram) / fill_rate_zram
- if t_zram < 0:
- t_zram = 0
- t_mem_zram = t_mem + t_zram
- z = ', t_zram={}'.format(round(t_zram, 2))
- else:
- z = ''
-
- t_mem_swap = t_mem + t_swap
-
- if CHECK_ZRAM:
-
- if t_mem_swap <= t_mem_zram:
- t = t_mem_swap
- else:
- t = t_mem_zram
- else:
- t = t_mem_swap
-
- if t > max_sleep:
- t = max_sleep
- elif t < min_sleep:
- t = min_sleep
- else:
- pass
-
- if debug_sleep:
- log('Sleep {} sec (t_mem={}, t_swap={}{})'.format(round(t, 2), round(
- t_mem, 2), round(t_swap, 2), z))
-
- try:
- stdout.flush()
- except OSError:
- pass
-
- sleep(t)
-
-
-def calculate_percent(arg_key):
- """
- parse conf dict
- Calculate mem_min_KEY_percent.
-
- Try use this one)
- arg_key: str key for config_dict
- returns int mem_min_percent or NoneType if got some error
- """
-
- if arg_key in config_dict:
- mem_min = config_dict[arg_key]
-
- if mem_min.endswith('%'):
- # truncate percents, so we have a number
- mem_min_percent = mem_min[:-1].strip()
- # then 'float test'
- mem_min_percent = string_to_float_convert_test(mem_min_percent)
- if mem_min_percent is None:
- errprint('Invalid {} value, not float\nExit'.format(arg_key))
- exit(1)
- # Final validations...
- if mem_min_percent < 0 or mem_min_percent > 100:
- errprint(
- '{}, as percents value, out of ran'
- 'ge [0; 100]\nExit'.format(arg_key))
- exit(1)
-
- # soft_threshold_min_mem_percent is clean and valid float percentage. Can
- # translate into Kb
- mem_min_kb = mem_min_percent / 100 * mem_total
- mem_min_mb = round(mem_min_kb / 1024)
-
- elif mem_min.endswith('M'):
- mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip())
- if mem_min_mb is None:
- errprint('Invalid {} value, not float\nExit'.format(arg_key))
- exit(1)
- mem_min_kb = mem_min_mb * 1024
- if mem_min_kb > mem_total:
- errprint(
- '{} value can not be greater then MemT'
- 'otal ({} MiB)\nExit'.format(
- arg_key, round(
- mem_total / 1024)))
- exit(1)
- mem_min_percent = mem_min_kb / mem_total * 100
-
- else:
- log('Invalid {} units in config.\n Exit'.format(arg_key))
- exit(1)
- mem_min_percent = None
-
- else:
- log('{} not in config\nExit'.format(arg_key))
- exit(1)
- mem_min_percent = None
-
- return mem_min_kb, mem_min_mb, mem_min_percent
-
-
-##########################################################################
-
-
-# {victim_id : {'time': timestamp, 'name': name}
-v_dict = dict()
-
-
-start_time = time()
-
-
-help_mess = """usage: nohang [-h] [-v] [-p] [-c CONFIG] [-cc CONFIG]
-
-optional arguments:
- -h, --help show this help message and exit
- -v, --version print version
- -p, --print-proc-table
- print table of processes with their badness values
- -c CONFIG, --config CONFIG
- path to the config file, default values:
- ./nohang.conf, /etc/nohang/nohang.conf
- -cc CONFIG, --check-config CONFIG
- check and print config"""
-
-
-SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
-
-SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE'])
-
-conf_err_mess = 'Invalid config. Exit.'
-
-sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP]
-
-sig_dict = {
- SIGKILL: 'SIGKILL',
- SIGINT: 'SIGINT',
- SIGQUIT: 'SIGQUIT',
- SIGHUP: 'SIGHUP',
- SIGTERM: 'SIGTERM'
-}
-
-self_pid = str(os.getpid())
-
-self_uid = os.geteuid()
-
-if self_uid == 0:
- root = True
-else:
- root = False
-
-
-if os.path.exists('./nohang_notify_helper'):
- notify_helper_path = './nohang_notify_helper'
-else:
- notify_helper_path = 'nohang_notify_helper'
-
-
-last_action_dict = dict()
-
-last_action_dict['t'] = time()
-
-
-# will store corrective actions stat
-stat_dict = dict()
-
-
-separate_log = False # will be overwritten after parse config
-
-
-cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
-
-
-self_oom_score_adj_min = '-600'
-self_oom_score_adj_max = '-6'
-
-
-write_self_oom_score_adj(self_oom_score_adj_min)
-
-
-pid_list = get_pid_list()
-
-
-print_proc_table_flag = False
-
-check_config_flag = False
-
-
-if os.path.exists('./nohang.conf'):
- config = os.getcwd() + '/nohang.conf'
-else:
- config = '/etc/nohang/nohang.conf'
-
-
-if len(argv) == 1:
- pass
-elif len(argv) == 2:
- if argv[1] == '--help' or argv[1] == '-h':
- print(help_mess)
- exit()
- elif argv[1] == '--check-config' or argv[1] == '-cc':
- check_config_flag = True
- elif argv[1] == '--version' or argv[1] == '-v':
- print_version()
- elif argv[1] == '--print-proc-table' or argv[1] == '-p':
- print_proc_table_flag = True
- if os.path.exists('./nohang.conf'):
- config = os.getcwd() + '/nohang.conf'
- else:
- config = '/etc/nohang/nohang.conf'
- else:
- errprint('Unknown option: {}'.format(argv[1]))
- exit(1)
-elif len(argv) == 3:
- if argv[1] == '--config' or argv[1] == '-c':
- config = argv[2]
- elif argv[1] == '--check-config' or argv[1] == '-cc':
- config = argv[2]
- check_config_flag = True
- else:
- errprint('Unknown option: {}'.format(argv[1]))
- exit(1)
-else:
- errprint('Invalid CLI input: too many options')
- exit(1)
-
-
-# find mem_total
-# find positions of SwapFree and SwapTotal in /proc/meminfo
-
-with open('/proc/meminfo') as f:
- mem_list = f.readlines()
-
-mem_list_names = []
-for s in mem_list:
- mem_list_names.append(s.split(':')[0])
-
-if mem_list_names[2] != 'MemAvailable':
- errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied')
- exit(1)
-
-swap_total_index = mem_list_names.index('SwapTotal')
-swap_free_index = swap_total_index + 1
-
-mem_total = int(mem_list[0].split(':')[1][:-4])
-
-# Get names from /proc/*/status to be able to get VmRSS and VmSwap values
-
-with open('/proc/self/status') as file:
- status_list = file.readlines()
-
-status_names = []
-for s in status_list:
- status_names.append(s.split(':')[0])
-
-ppid_index = status_names.index('PPid')
-vm_size_index = status_names.index('VmSize')
-vm_rss_index = status_names.index('VmRSS')
-vm_swap_index = status_names.index('VmSwap')
-uid_index = status_names.index('Uid')
-state_index = status_names.index('State')
-
-
-try:
- anon_index = status_names.index('RssAnon')
- file_index = status_names.index('RssFile')
- shmem_index = status_names.index('RssShmem')
- detailed_rss = True
- # print(detailed_rss, 'detailed_rss')
-except ValueError:
- detailed_rss = False
- # print('It is not Linux 4.5+')
-
-
-log('config: ' + config)
-
-
-##########################################################################
-
-# parsing the config with obtaining the parameters dictionary
-
-# conf_parameters_dict
-# conf_restart_dict
-
-# dictionary with config options
-config_dict = dict()
-
-badness_adj_re_name_list = []
-badness_adj_re_cmdline_list = []
-badness_adj_re_environ_list = []
-badness_adj_re_uid_list = []
-badness_adj_re_cgroup_v1_list = []
-badness_adj_re_cgroup_v2_list = []
-badness_adj_re_realpath_list = []
-
-soft_actions_list = []
-
-# separator for optional parameters (that starts with @)
-opt_separator = '///'
-
-# stupid conf parsing, need refactoring
-try:
- with open(config) as f:
-
- for line in f:
-
- a = line.startswith('#')
- b = line.startswith('\n')
- c = line.startswith('\t')
- d = line.startswith(' ')
-
- etc = line.startswith('@SOFT_ACTION_RE_NAME')
- etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1')
-
- if not a and not b and not c and not d and not etc and not etc2:
- a = line.partition('=')
-
- key = a[0].strip()
- value = a[2].strip()
-
- if key not in config_dict:
- config_dict[key] = value
- else:
- log('ERROR: config key duplication: {}'.format(key))
- exit(1)
-
- if etc:
-
- a = line.partition('@SOFT_ACTION_RE_NAME')[
- 2].partition(opt_separator)
-
- a1 = 'name'
-
- a2 = a[0].strip()
- valid_re(a2)
-
- a3 = a[2].strip()
-
- zzz = (a1, a2, a3)
-
- soft_actions_list.append(zzz)
-
- if etc2:
-
- a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[
- 2].partition(opt_separator)
-
- a1 = 'cgroup_v1'
-
- a2 = a[0].strip()
- valid_re(a2)
-
- a3 = a[2].strip()
-
- zzz = (a1, a2, a3)
-
- soft_actions_list.append(zzz)
-
- if line.startswith('@BADNESS_ADJ_RE_NAME'):
- a = line.partition('@BADNESS_ADJ_RE_NAME')[2].strip(
- ' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- badness_adj_re_name_list.append((badness_adj, reg_exp))
-
- if line.startswith('@BADNESS_ADJ_RE_CMDLINE'):
- a = line.partition('@BADNESS_ADJ_RE_CMDLINE')[2].strip(
- ' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- badness_adj_re_cmdline_list.append((badness_adj, reg_exp))
-
- if line.startswith('@BADNESS_ADJ_RE_UID'):
- a = line.partition('@BADNESS_ADJ_RE_UID')[2].strip(
- ' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- badness_adj_re_uid_list.append((badness_adj, reg_exp))
-
- if line.startswith('@BADNESS_ADJ_RE_CGROUP_V1'):
- a = line.partition('@BADNESS_ADJ_RE_CGROUP_V1')[2].strip(
- ' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- badness_adj_re_cgroup_v1_list.append((badness_adj, reg_exp))
-
- if line.startswith('@BADNESS_ADJ_RE_CGROUP_V2'):
- a = line.partition('@BADNESS_ADJ_RE_CGROUP_V2')[2].strip(
- ' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- badness_adj_re_cgroup_v2_list.append((badness_adj, reg_exp))
-
- if line.startswith('@BADNESS_ADJ_RE_REALPATH'):
- a = line.partition('@BADNESS_ADJ_RE_REALPATH')[2].strip(
- ' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- badness_adj_re_realpath_list.append((badness_adj, reg_exp))
-
- if line.startswith('@BADNESS_ADJ_RE_ENVIRON'):
- a = line.partition('@BADNESS_ADJ_RE_ENVIRON')[2].strip(
- ' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- badness_adj_re_environ_list.append((badness_adj, reg_exp))
-
-
-except PermissionError:
- errprint('PermissionError', conf_err_mess)
- exit(1)
-except UnicodeDecodeError:
- errprint('UnicodeDecodeError', conf_err_mess)
- exit(1)
-except IsADirectoryError:
- errprint('IsADirectoryError', conf_err_mess)
- exit(1)
-except IndexError:
- errprint('IndexError', conf_err_mess)
- exit(1)
-except FileNotFoundError:
- errprint('FileNotFoundError', conf_err_mess)
- exit(1)
-
-
-if badness_adj_re_name_list == []:
- regex_matching = False
-else:
- regex_matching = True
-
-
-if badness_adj_re_cmdline_list == []:
- re_match_cmdline = False
-else:
- re_match_cmdline = True
-
-
-if badness_adj_re_uid_list == []:
- re_match_uid = False
-else:
- re_match_uid = True
-
-
-if badness_adj_re_environ_list == []:
- re_match_environ = False
-else:
- re_match_environ = True
-
-
-if badness_adj_re_realpath_list == []:
- re_match_realpath = False
-else:
- re_match_realpath = True
-
-
-if badness_adj_re_cgroup_v1_list == []:
- re_match_cgroup_v1 = False
-else:
- re_match_cgroup_v1 = True
-
-
-if badness_adj_re_cgroup_v2_list == []:
- re_match_cgroup_v2 = False
-else:
- re_match_cgroup_v2 = True
-
-
-if soft_actions_list == []:
- soft_actions = False
-else:
- soft_actions = True
-
-
-##########################################################################
-
-
-# post_zombie_delay = 0.1
-
-# victim_cache_time = 50
-
-
-# extracting parameters from the dictionary
-# check for all necessary parameters
-# validation of all parameters
-debug_psi = conf_parse_bool('debug_psi')
-print_statistics = conf_parse_bool('print_statistics')
-print_proc_table = conf_parse_bool('print_proc_table')
-forbid_negative_badness = conf_parse_bool('forbid_negative_badness')
-print_victim_status = conf_parse_bool('print_victim_status')
-print_victim_cmdline = conf_parse_bool('print_victim_cmdline')
-print_config_at_startup = conf_parse_bool('print_config_at_startup')
-print_mem_check_results = conf_parse_bool('print_mem_check_results')
-debug_sleep = conf_parse_bool('debug_sleep')
-low_memory_warnings_enabled = conf_parse_bool('low_memory_warnings_enabled')
-post_action_gui_notifications = conf_parse_bool(
- 'post_action_gui_notifications')
-
-
-psi_checking_enabled = conf_parse_bool('psi_checking_enabled')
-ignore_psi = not psi_checking_enabled
-
-zram_checking_enabled = conf_parse_bool('zram_checking_enabled')
-ignore_zram = not zram_checking_enabled
-
-
-debug_gui_notifications = conf_parse_bool('debug_gui_notifications')
-ignore_positive_oom_score_adj = conf_parse_bool(
- 'ignore_positive_oom_score_adj')
-
-
-(soft_threshold_min_mem_kb, soft_threshold_min_mem_mb,
- soft_threshold_min_mem_percent) = calculate_percent('soft_threshold_min_mem')
-
-(hard_threshold_min_mem_kb, hard_threshold_min_mem_mb,
- hard_threshold_min_mem_percent) = calculate_percent('hard_threshold_min_mem')
-
-(soft_threshold_max_zram_kb, soft_threshold_max_zram_mb,
- soft_threshold_max_zram_percent) = calculate_percent('soft_threshold_max_zram')
-
-(hard_threshold_max_zram_kb, hard_threshold_max_zram_mb,
- hard_threshold_max_zram_percent) = calculate_percent('hard_threshold_max_zram')
-
-(warning_threshold_min_mem_kb, warning_threshold_min_mem_mb,
- warning_threshold_min_mem_percent) = calculate_percent('warning_threshold_min_mem')
-
-(warning_threshold_max_zram_kb, warning_threshold_max_zram_mb,
- warning_threshold_max_zram_percent) = calculate_percent('warning_threshold_max_zram')
-
-
-if 'post_zombie_delay' in config_dict:
- post_zombie_delay = string_to_float_convert_test(
- config_dict['post_zombie_delay'])
- if post_zombie_delay is None:
- errprint('Invalid post_zombie_delay, not float\nExit')
- exit(1)
- if post_zombie_delay < 0:
- errprint('post_zombie_delay MUST be >= 0\nExit')
- exit(1)
-else:
- errprint('post_zombie_delay not in config\nExit')
- exit(1)
-
-
-if 'victim_cache_time' in config_dict:
- victim_cache_time = string_to_float_convert_test(
- config_dict['victim_cache_time'])
- if victim_cache_time is None:
- errprint('Invalid victim_cache_time, not float\nExit')
- exit(1)
- if victim_cache_time < 0:
- errprint('victim_cache_time MUST be >= 0\nExit')
- exit(1)
-else:
- errprint('victim_cache_time not in config\nExit')
- exit(1)
-
-
-if 'fill_rate_mem' in config_dict:
- fill_rate_mem = string_to_float_convert_test(config_dict['fill_rate_mem'])
- if fill_rate_mem is None:
- errprint('Invalid fill_rate_mem value, not float\nExit')
- exit(1)
- if fill_rate_mem <= 0:
- errprint('fill_rate_mem MUST be > 0\nExit')
- exit(1)
-else:
- errprint('fill_rate_mem not in config\nExit')
- exit(1)
-
-
-if 'fill_rate_swap' in config_dict:
- fill_rate_swap = string_to_float_convert_test(
- config_dict['fill_rate_swap'])
- if fill_rate_swap is None:
- errprint('Invalid fill_rate_swap value, not float\nExit')
- exit(1)
- if fill_rate_swap <= 0:
- errprint('fill_rate_swap MUST be > 0\nExit')
- exit(1)
-else:
- errprint('fill_rate_swap not in config\nExit')
- exit(1)
-
-
-if 'fill_rate_zram' in config_dict:
- fill_rate_zram = string_to_float_convert_test(
- config_dict['fill_rate_zram'])
- if fill_rate_zram is None:
- errprint('Invalid fill_rate_zram value, not float\nExit')
- exit(1)
- if fill_rate_zram <= 0:
- errprint('fill_rate_zram MUST be > 0\nExit')
- exit(1)
-else:
- errprint('fill_rate_zram not in config\nExit')
- exit(1)
-
-
-if 'soft_threshold_min_swap' in config_dict:
- soft_threshold_min_swap = config_dict['soft_threshold_min_swap']
-else:
- errprint('soft_threshold_min_swap not in config\nExit')
- exit(1)
-
-
-if 'hard_threshold_min_swap' in config_dict:
- hard_threshold_min_swap = config_dict['hard_threshold_min_swap']
-else:
- errprint('hard_threshold_min_swap not in config\nExit')
- exit(1)
-
-
-if 'post_soft_action_delay' in config_dict:
- post_soft_action_delay = string_to_float_convert_test(
- config_dict['post_soft_action_delay'])
- if post_soft_action_delay is None:
- errprint('Invalid post_soft_action_delay value, not float\nExit')
- exit(1)
- if post_soft_action_delay < 0:
- errprint('post_soft_action_delay must be positiv\nExit')
- exit(1)
-else:
- errprint('post_soft_action_delay not in config\nExit')
- exit(1)
-
-
-if 'psi_post_action_delay' in config_dict:
- psi_post_action_delay = string_to_float_convert_test(
- config_dict['psi_post_action_delay'])
- if psi_post_action_delay is None:
- errprint('Invalid psi_post_action_delay value, not float\nExit')
- exit(1)
- if psi_post_action_delay < 0:
- errprint('psi_post_action_delay must be positive\nExit')
- exit(1)
-else:
- errprint('psi_post_action_delay not in config\nExit')
- exit(1)
-
-
-if 'hard_threshold_max_psi' in config_dict:
- hard_threshold_max_psi = string_to_float_convert_test(
- config_dict['hard_threshold_max_psi'])
- if hard_threshold_max_psi is None:
- errprint('Invalid hard_threshold_max_psi value, not float\nExit')
- exit(1)
- if hard_threshold_max_psi < 0 or hard_threshold_max_psi > 100:
- errprint('hard_threshold_max_psi must be in the range [0; 100]\nExit')
- exit(1)
-else:
- errprint('hard_threshold_max_psi not in config\nExit')
- exit(1)
-
-
-if 'soft_threshold_max_psi' in config_dict:
- soft_threshold_max_psi = string_to_float_convert_test(
- config_dict['soft_threshold_max_psi'])
- if soft_threshold_max_psi is None:
- errprint('Invalid soft_threshold_max_psi value, not float\nExit')
- exit(1)
- if soft_threshold_max_psi < 0 or soft_threshold_max_psi > 100:
- errprint('soft_threshold_max_psi must be in the range [0; 100]\nExit')
- exit(1)
-else:
- errprint('soft_threshold_max_psi not in config\nExit')
- exit(1)
-
-
-if 'warning_threshold_max_psi' in config_dict:
- warning_threshold_max_psi = string_to_float_convert_test(
- config_dict['warning_threshold_max_psi'])
- if warning_threshold_max_psi is None:
- errprint('Invalid warning_threshold_max_psi value, not float\nExit')
- exit(1)
- if warning_threshold_max_psi < 0 or warning_threshold_max_psi > 100:
- errprint(
- 'warning_threshold_max_psi must be in the range [0; 100]\nExit')
- exit(1)
-else:
- errprint('warning_threshold_max_psi not in config\nExit')
- exit(1)
-
-
-if 'min_badness' in config_dict:
- min_badness = string_to_int_convert_test(
- config_dict['min_badness'])
- if min_badness is None:
- errprint('Invalid min_badness value, not integer\nExit')
- exit(1)
- if min_badness < 0 or min_badness > 1000:
- errprint('Invalud min_badness value\nExit')
- exit(1)
-else:
- errprint('min_badness not in config\nExit')
- exit(1)
-
-
-if 'min_post_warning_delay' in config_dict:
- min_post_warning_delay = string_to_float_convert_test(
- config_dict['min_post_warning_delay'])
- if min_post_warning_delay is None:
- errprint('Invalid min_post_warning_delay value, not float\nExit')
- exit(1)
- if min_post_warning_delay < 1 or min_post_warning_delay > 300:
- errprint('min_post_warning_delay value out of range [1; 300]\nExit')
- exit(1)
-else:
- errprint('min_post_warning_delay not in config\nExit')
- exit(1)
-
-
-if 'warning_threshold_min_swap' in config_dict:
- warning_threshold_min_swap = config_dict['warning_threshold_min_swap']
-else:
- errprint('warning_threshold_min_swap not in config\nExit')
- exit(1)
-
-
-if 'max_victim_ancestry_depth' in config_dict:
- max_victim_ancestry_depth = string_to_int_convert_test(
- config_dict['max_victim_ancestry_depth'])
- if min_badness is None:
- errprint('Invalid max_victim_ancestry_depth value, not integer\nExit')
- exit(1)
- if max_victim_ancestry_depth < 1:
- errprint('Invalud max_victim_ancestry_depth value\nExit')
- exit(1)
-else:
- errprint('max_victim_ancestry_depth is not in config\nExit')
- exit(1)
-
-
-if 'max_soft_exit_time' in config_dict:
- max_soft_exit_time = string_to_float_convert_test(
- config_dict['max_soft_exit_time'])
- if max_soft_exit_time is None:
- errprint('Invalid max_soft_exit_time val'
- 'ue, not float\nExit')
- exit(1)
- if max_soft_exit_time < 0:
- errprint('max_soft_exit_time must be non-n'
- 'egative number\nExit')
- exit(1)
-else:
- errprint('max_soft_exit_time is not in config\nExit')
- exit(1)
-
-
-if 'post_kill_exe' in config_dict:
- post_kill_exe = config_dict['post_kill_exe']
-else:
- errprint('post_kill_exe is not in config\nExit')
- exit(1)
-
-
-if 'psi_path' in config_dict:
- psi_path = config_dict['psi_path']
-else:
- errprint('psi_path is not in config\nExit')
- exit(1)
-
-
-if 'psi_metrics' in config_dict:
- psi_metrics = config_dict['psi_metrics']
-else:
- errprint('psi_metrics is not in config\nExit')
- exit(1)
-
-
-if 'warning_exe' in config_dict:
- warning_exe = config_dict['warning_exe']
- if warning_exe != '':
- check_warning_exe = True
- else:
- check_warning_exe = False
-else:
- errprint('warning_exe is not in config\nExit')
- exit(1)
-
-
-if 'extra_table_info' in config_dict:
- extra_table_info = config_dict['extra_table_info']
- if (extra_table_info != 'None' and
- extra_table_info != 'cgroup_v1' and
- extra_table_info != 'cgroup_v2' and
- extra_table_info != 'cmdline' and
- extra_table_info != 'environ' and
- extra_table_info != 'realpath'):
-
- errprint('Invalid config: invalid extra_table_info value\nExit')
- exit(1)
-else:
- errprint('Invalid config: extra_table_info is not in config\nExit')
- exit(1)
-
-
-separate_log = conf_parse_bool('separate_log')
-
-if separate_log:
-
- import logging
-
- log_dir = '/var/log/nohang'
-
- try:
- os.mkdir(log_dir)
- except PermissionError:
- print('ERROR: can not create log dir')
- except FileExistsError:
- pass
-
- logfile = log_dir + '/nohang.log'
-
- try:
- with open(logfile, 'a') as f:
- pass
- except FileNotFoundError:
- print('ERROR: log FileNotFoundError')
- except PermissionError:
- print('ERROR: log PermissionError')
-
- try:
- logging.basicConfig(
- filename=logfile,
- level=logging.INFO,
- format="%(asctime)s: %(message)s")
- except PermissionError:
- errprint('ERROR: Permission denied: {}'.format(logfile))
- except FileNotFoundError:
- errprint('ERROR: FileNotFoundError: {}'.format(logfile))
-
-
-if 'min_mem_report_interval' in config_dict:
- min_mem_report_interval = string_to_float_convert_test(
- config_dict['min_mem_report_interval'])
- if min_mem_report_interval is None:
- errprint('Invalid min_mem_report_interval value, not float\nExit')
- exit(1)
- if min_mem_report_interval < 0:
- errprint('min_mem_report_interval must be non-negative number\nExit')
- exit(1)
-else:
- errprint('min_mem_report_interval is not in config\nExit')
- exit(1)
-
-
-if 'psi_excess_duration' in config_dict:
- psi_excess_duration = string_to_float_convert_test(
- config_dict['psi_excess_duration'])
- if psi_excess_duration is None:
- errprint('Invalid psi_excess_duration value, not float\nExit')
- exit(1)
- if psi_excess_duration < 0:
- errprint('psi_excess_duration must be non-negative number\nExit')
- exit(1)
-else:
- errprint('psi_excess_duration is not in config\nExit')
- exit(1)
-
-
-if 'max_sleep' in config_dict:
- max_sleep = string_to_float_convert_test(
- config_dict['max_sleep'])
- if max_sleep is None:
- errprint('Invalid max_sleep value, not float\nExit')
- exit(1)
- if max_sleep <= 0:
- errprint('max_sleep must be positive number\nExit')
- exit(1)
-else:
- errprint('max_sleep is not in config\nExit')
- exit(1)
-
-
-if 'min_sleep' in config_dict:
- min_sleep = string_to_float_convert_test(
- config_dict['min_sleep'])
- if min_sleep is None:
- errprint('Invalid min_sleep value, not float\nExit')
- exit(1)
- if min_sleep <= 0:
- errprint('min_sleep must be positive number\nExit')
- exit(1)
-else:
- errprint('min_sleep is not in config\nExit')
- exit(1)
-
-
-if 'over_sleep' in config_dict:
- over_sleep = string_to_float_convert_test(
- config_dict['over_sleep'])
- if over_sleep is None:
- errprint('Invalid over_sleep value, not float\nExit')
- exit(1)
- if over_sleep <= 0:
- errprint('over_sleep must be positive number\nExit')
- exit(1)
-else:
- errprint('over_sleep is not in config\nExit')
- exit(1)
-
-
-sensitivity_test_time = over_sleep / 2
-
-
-if max_sleep < min_sleep:
- errprint('min_sleep value must not exceed max_sleep value.\nExit')
- exit(1)
-
-
-if min_sleep < over_sleep:
- errprint('over_sleep value must not exceed min_sleep value.\nExit')
- exit(1)
-
-
-if max_sleep == min_sleep:
- stable_sleep = True
-else:
- stable_sleep = False
-
-
-if print_proc_table_flag:
-
- if not root:
- log('WARNING: effective UID != 0; euid={}; processes with other e'
- 'uids will be invisible for nohang'.format(self_uid))
-
- func_print_proc_table()
-
-
-##########################################################################
-
-
-psi_support = os.path.exists(psi_path)
-
-
-##########################################################################
-
-# Get KiB levels if it's possible.
-
-soft_threshold_min_swap_tuple = get_swap_threshold_tuple(
- soft_threshold_min_swap)
-hard_threshold_min_swap_tuple = get_swap_threshold_tuple(
- hard_threshold_min_swap)
-warning_threshold_min_swap_tuple = get_swap_threshold_tuple(
- warning_threshold_min_swap)
-
-
-swap_kb_dict = dict()
-
-swap_term_is_percent = soft_threshold_min_swap_tuple[1]
-if swap_term_is_percent:
- soft_threshold_min_swap_percent = soft_threshold_min_swap_tuple[0]
-else:
- soft_threshold_min_swap_kb = soft_threshold_min_swap_tuple[0]
- swap_kb_dict['soft_threshold_min_swap_kb'] = soft_threshold_min_swap_kb
-
-swap_kill_is_percent = hard_threshold_min_swap_tuple[1]
-if swap_kill_is_percent:
- hard_threshold_min_swap_percent = hard_threshold_min_swap_tuple[0]
-else:
- hard_threshold_min_swap_kb = hard_threshold_min_swap_tuple[0]
- swap_kb_dict['hard_threshold_min_swap_kb'] = hard_threshold_min_swap_kb
-
-
-swap_warn_is_percent = warning_threshold_min_swap_tuple[1]
-if swap_warn_is_percent:
- warning_threshold_min_swap_percent = warning_threshold_min_swap_tuple[0]
-else:
- warning_threshold_min_swap_kb = warning_threshold_min_swap_tuple[0]
- swap_kb_dict['warning_threshold_min_swap_kb'] = warning_threshold_min_swap_kb
-
-
-##########################################################################
-
-
-if print_config_at_startup or check_config_flag:
- check_config()
-
-
-##########################################################################
-
-
-# for calculating the column width when printing mem and zram
-mem_len = len(str(round(mem_total / 1024.0)))
-
-if post_action_gui_notifications:
- notify_sig_dict = {SIGKILL: 'Killing',
- SIGTERM: 'Terminating'}
-
-
-# convert rates from MiB/s to KiB/s
-fill_rate_mem = fill_rate_mem * 1024
-fill_rate_swap = fill_rate_swap * 1024
-fill_rate_zram = fill_rate_zram * 1024
-
-
-warn_time_now = 0
-warn_time_delta = 1000
-warn_timer = 0
-
-
-##########################################################################
-
-
-if not root:
- log('WARNING: effective UID != 0; euid={}; processes with other e'
- 'uids will be invisible for nohang'.format(self_uid))
-
-
-# Try to lock all memory
-
-mlockall()
-
-##########################################################################
-
-
-# print_self_rss()
-
-psi_avg_string = '' # will be overwritten if PSI monitoring enabled
-
-mem_used_zram = 0
-
-
-if print_mem_check_results:
-
- # to find delta mem
- wt2 = 0
- new_mem = 0
-
- # init mem report interval
- report0 = 0
-
-
-# handle signals
-for i in sig_list:
- signal(i, signal_handler)
-
-
-x0 = time()
-delta0 = 0
-
-
-threshold = None
-mem_info = None
-
-
-CHECK_PSI = False
-if psi_support and not ignore_psi:
- CHECK_PSI = True
-
-psi_kill_exceeded_timer = 0
-psi_term_exceeded_timer = 0
-psi_t0 = time()
-psi_threshold = zram_threshold = zram_info = psi_info = None
-
-
-CHECK_ZRAM = not ignore_zram
-
-log('Monitoring has started!')
-
-stdout.flush()
-
-
-##########################################################################
-
-
-while True:
-
- (masf_threshold, masf_info, mem_available, hard_threshold_min_swap_kb,
- soft_threshold_min_swap_kb, swap_free, swap_total) = check_mem_swap_ex()
-
- if CHECK_ZRAM:
- zram_threshold, zram_info, mem_used_zram = check_zram_ex()
-
- if CHECK_PSI:
- (psi_threshold, psi_info, psi_t0, psi_kill_exceeded_timer,
- psi_term_exceeded_timer, x0) = check_psi_ex(
- psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0)
-
- if print_mem_check_results:
-
- if CHECK_PSI:
- psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
- if time() - psi_t0 >= psi_post_action_delay:
- psi_post_action_delay_exceeded = True
- else:
- psi_post_action_delay_exceeded = False
-
- if print_mem_check_results:
- psi_avg_string = 'PSI avg: {} | '.format(
- str(psi_avg_value).rjust(6))
-
- wt1 = time()
-
- delta = (mem_available + swap_free) - new_mem
-
- t_cycle = wt1 - wt2
-
- report_delta = wt1 - report0
-
- if report_delta >= min_mem_report_interval:
-
- mem_report = True
- new_mem = mem_available + swap_free
-
- report0 = wt1
-
- else:
- mem_report = False
-
- wt2 = time()
-
- if mem_report:
-
- speed = delta / 1024.0 / report_delta
- speed_info = ' | dMem: {} M/s'.format(
- str(round(speed)).rjust(5)
- )
-
- # Calculate 'swap-column' width
- swap_len = len(str(round(swap_total / 1024.0)))
-
- # Output available mem sizes
- if swap_total == 0 and mem_used_zram == 0:
- log('{}MemAvail: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- speed_info
- )
- )
-
- elif swap_total > 0 and mem_used_zram == 0:
- log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- human(swap_free, swap_len),
- just_percent_swap(swap_free / (swap_total + 0.1)),
- speed_info
- )
- )
-
- else:
- log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
- 'UsedZram: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- human(swap_free, swap_len),
- just_percent_swap(swap_free / (swap_total + 0.1)),
- human(mem_used_zram, mem_len),
- just_percent_mem(mem_used_zram / mem_total),
- speed_info
- )
- )
-
- if (masf_threshold is SIGKILL or zram_threshold is SIGKILL or
- psi_threshold is SIGKILL):
-
- threshold = SIGKILL
- mem_info_list = []
-
- if masf_info is not None:
- mem_info_list.append(masf_info)
-
- if zram_info is not None:
- mem_info_list.append(zram_info)
-
- if psi_info is not None:
- mem_info_list.append(psi_info)
-
- psi_t0 = implement_corrective_action(
- threshold,
- mem_info_list,
- psi_t0,
- psi_kill_exceeded_timer,
- psi_term_exceeded_timer,
- x0, psi_threshold, zram_threshold, zram_info, psi_info)
- continue
-
- if (masf_threshold is SIGTERM or zram_threshold is SIGTERM or
- psi_threshold is SIGTERM):
-
- threshold = SIGTERM
- mem_info_list = []
-
- if masf_info is not None:
- mem_info_list.append(masf_info)
-
- if zram_info is not None:
- mem_info_list.append(zram_info)
-
- if psi_info is not None:
- mem_info_list.append(psi_info)
-
- psi_t0 = implement_corrective_action(
- threshold,
- mem_info_list,
- psi_t0,
- psi_kill_exceeded_timer,
- psi_term_exceeded_timer,
- x0, psi_threshold, zram_threshold, zram_info, psi_info)
- continue
-
- if low_memory_warnings_enabled:
-
- if (masf_threshold == 'WARN' or zram_threshold == 'WARN' or
- psi_threshold == 'WARN'):
-
- warn_time_delta = time() - warn_time_now
- warn_time_now = time()
- warn_timer += warn_time_delta
- if warn_timer > min_post_warning_delay:
-
- send_notify_warn()
-
- warn_timer = 0
-
- sleep_after_check_mem()
diff --git a/old/nohang.conf b/old/nohang.conf
deleted file mode 100644
index 1b13348..0000000
--- a/old/nohang.conf
+++ /dev/null
@@ -1,359 +0,0 @@
- This is nohang config file.
- Lines starting with #, tabs and spaces are comments.
- Lines starting with @ contain optional parameters.
- All values are case sensitive.
- Be careful: nohang doesn't forbid you to shoot yourself in the foot.
-
- The configuration includes the following sections:
-
- 0. Common zram settings
- 1. Memory levels to respond to as an OOM threat
- 2. Response on PSI memory metrics
- 3. The frequency of checking the level of available memory
- (and CPU usage)
- 4. The prevention of killing innocent victims
- 5. Impact on the badness of processes via matching their names, cgroups and
- cmdlines with specified regular expressions
- 6. Customize corrective actions: the execution of a specific command
- instead of sending the SIGTERM signal
- 7. GUI notifications:
- - low memory warnings
- - OOM prevention results
- 8. Output verbosity
- 9. Misc
-
- Just read the description of the parameters and edit the values.
- Please restart the program after editing the config.
-
- More docs will be written later.
-
-###############################################################################
-
- 0. Common zram settings
-
- See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
- You maybe need to set `zram_checking_enabled = True` if you has a big zram disksize.
-
-zram_checking_enabled = False
-
-###############################################################################
-
- 1. Thresholds below which a signal should be sent to the victim
-
- Sets the available memory levels at or below which SIGTERM or SIGKILL
- signals are sent. The signal will be sent if MemAvailable and
- SwapFree (in /proc/meminfo) at the same time will drop below the
- corresponding values. Can be specified in % (percent) and M (MiB).
- Valid values are floating-point numbers from the range [0; 100] %.
-
- MemAvailable levels.
-
-soft_threshold_min_mem = 8 %
-hard_threshold_min_mem = 4 %
-
- SwapFree levels.
-
-soft_threshold_min_swap = 10 %
-hard_threshold_min_swap = 5 %
-
- Specifying the total share of zram in memory, if exceeded the
- corresponding signals are sent. As the share of zram in memory
- increases, it may fall responsiveness of the system. 90 % is a
- usual hang level, not recommended to set very high.
-
- Can be specified in % and M. Valid values are floating-point
- numbers from the range [0; 90] %.
-
-soft_threshold_max_zram = 60 %
-hard_threshold_max_zram = 65 %
-
-
-###############################################################################
-
- 2. Response on PSI memory metrics (it needs Linux 4.20 and up)
-
- About PSI:
- https://facebookmicrosites.github.io/psi/
-
- Disabled by default (psi_checking_enabled = False).
-
-psi_checking_enabled = False
-
- Choose a path to PSI file.
- By default it monitors system-wide file: /proc/pressure/memory
- You also can set file to monitor one cgroup slice.
- For example:
- psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
- psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
- psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
-
- Execute the command
- find /sys/fs/cgroup -name memory.pressure
- to find available memory.pressue files (except /proc/pressure/memory).
- (actual for cgroup2)
-
-psi_path = /proc/pressure/memory
-
- Valid psi_metrics are:
- some_avg10
- some_avg60
- some_avg300
- full_avg10
- full_avg60
- full_avg300
-
- some_avg10 is most sensitive.
-
-psi_metrics = some_avg10
-
-soft_threshold_max_psi = 60
-
-hard_threshold_max_psi = 90
-
- >= 0, float
-psi_excess_duration = 60
-
-psi_post_action_delay = 60
-
-
-###############################################################################
-
- 3. The frequency of checking the amount of available memory
- (and CPU usage)
-
- Coefficients that affect the intensity of monitoring. Reducing
- the coefficients can reduce CPU usage and increase the periods
- between memory checks.
-
- Why three coefficients instead of one? Because the swap fill rate
- is usually lower than the RAM fill rate.
-
- It is possible to set a lower intensity of monitoring for swap
- without compromising to prevent OOM and thus reduce the CPU load.
-
- Default values are well for desktop. On servers without rapid
- fluctuations in memory levels the values can be reduced.
-
- Valid values are positive floating-point numbers.
-
-fill_rate_mem = 4000
-fill_rate_swap = 1500
-fill_rate_zram = 6000
-
- See also https://github.com/rfjakob/earlyoom/issues/61
-
-max_sleep = 3
-min_sleep = 0.1
-
- Sleep time if soft threshold exceeded.
-
-over_sleep = 0.05
-
-###############################################################################
-
- 4. The prevention of killing innocent victims
-
- Valid values are integers from the range [0; 1000].
-
-min_badness = 10
-
- Valid values are non-negative floating-point numbers.
- Min delay if a victim doesn't respond to SIGTERM in 10 ms.
-
-post_soft_action_delay = 3
-
-post_zombie_delay = 0.1
-
-victim_cache_time = 10
-
- Valid values are True and False.
-
-ignore_positive_oom_score_adj = False
-
-###############################################################################
-
- 5. Impact on the badness of processes via matching their names,
- cmdlines or UIDs with regular expressions using re.search().
-
- See https://en.wikipedia.org/wiki/Regular_expression and
- https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
-
- Enabling this options slows down the search for the victim
- because the names, cmdlines or UIDs of all processes
- (except init and kthreads) are compared with the
- specified regex patterns (in fact slowing down is caused by
- reading all /proc/*/cmdline and /proc/*/status files).
-
- Use script `oom-sort` from nohang package to view
- names, cmdlines and UIDs of processes.
-
- 5.1. Matching process names with RE patterns
-
- Syntax:
-
- @BADNESS_ADJ_RE_NAME badness_adj /// RE_pattern
-
- New badness value will be += badness_adj
-
- It is possible to compare multiple patterns
- with different badness_adj values.
-
- Example:
- @BADNESS_ADJ_RE_NAME -500 /// ^sshd$
-
- 5.2. Matching CGroup_v1-line with RE patterns
-
- @BADNESS_ADJ_RE_CGROUP_V1 -100 /// ^/system\.slice/
-
- @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
-
- @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
-
- 5.3. Matching CGroup_v2-line with RE patterns
-
- @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
-
- 5.4. Matching eUIDs with RE patterns
-
- @BADNESS_ADJ_RE_UID -100 /// ^0$
-
- 5.5. Matching realpath with RE patterns
-
- @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
-
- 5.6. Matching cmdlines with RE patterns
-
- A good option that allows fine adjustment.
-
- Prefer chromium tabs and electron-based apps
- @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
-
- Prefer firefox tabs (Web Content and WebExtensions)
- @BADNESS_ADJ_RE_CMDLINE 300 /// -appomni
-
- @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
-
- 5.7. Matching environ with RE patterns
-
- @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
-
- Note that you can control badness also via systemd units via
- OOMScoreAdjust, see
- www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
-
-###############################################################################
-
- 6. Customize corrective actions.
-
- TODO: docs
-
- Syntax:
- KEY REGEXP SEPARATOR COMMAND
-
- @SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID
- @SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID
-
- @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
- @SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
-
- $PID will be replaced by process PID.
- $NAME will be replaced by process name.
- $SERVICE will be replaced by .service if it exists (overwise it will be
- relpaced by empty line)
-
-###############################################################################
-
- 7. GUI notifications & low memory warnings
-
-post_action_gui_notifications = False
-
- Enable GUI notifications about the low level of available memory.
- Valid values are True and False.
-
-low_memory_warnings_enabled = False
-
- Execute the command instead of sending GUI notifications if the value is
- not empty line. For example:
- warning_exe = cat /proc/meminfo &
-
-warning_exe =
-
- Can be specified in % (percent) and M (MiB).
- Valid values are floating-point numbers from the range [0; 100] %.
-
-warning_threshold_min_mem = 20 %
-
-warning_threshold_min_swap = 25 %
-
-warning_threshold_max_zram = 50 %
-
-warning_threshold_max_psi = 100
-
- Valid values are floating-point numbers from the range [1; 300].
-
-min_post_warning_delay = 20
-
- Ampersands (&) will be replaced with asterisks (*) in process
- names and in commands.
-
-###############################################################################
-
- 8. Verbosity
-
- Display the configuration when the program starts.
- Valid values are True and False.
-
-print_config_at_startup = False
-
- Print memory check results.
- Valid values are True and False.
-
-print_mem_check_results = False
-
-min_mem_report_interval = 60
-
-print_proc_table = False
-
- Valid values:
- None
- cgroup_v1
- cgroup_v2
- realpath
- cmdline
- environ
-
-extra_table_info = None
-
-print_victim_status = True
-
-max_victim_ancestry_depth = 3
-
-print_victim_cmdline = False
-
-print_statistics = True
-
- Print sleep periods between memory checks.
- Valid values are True and False.
-
-debug_psi = False
-
-debug_gui_notifications = False
-
-debug_sleep = False
-
-separate_log = False
-
-###############################################################################
-
- 9. Misc
-
-max_soft_exit_time = 10
-
-post_kill_exe =
-
-forbid_negative_badness = True
-
-###############################################################################
-
- Use cases, feature requests and any questions are welcome:
- https://github.com/hakavlad/nohang/issues
diff --git a/old/nohang_notify_helper b/old/nohang_notify_helper
deleted file mode 100755
index b5beb43..0000000
--- a/old/nohang_notify_helper
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/usr/bin/env python3
-
-# print('Starting nohang_notify_helper')
-
-
-def decoder(string):
- """
- """
- decoded = ''
- for i in string.split(':'):
- decoded += chr(int(i))
- return decoded
-
-
-def write(path, string):
- """
- """
- with open(path, 'w') as f:
- f.write(string)
-
-
-def rline1(path):
- """read 1st line from path."""
- try:
- with open(path) as f:
- for line in f:
- return line
- except OSError:
- exit(1)
-
-
-def rfile(path):
- """read file."""
- with open(path) as f:
- return f.read()
-
-
-def re_pid_environ(pid):
- """
- read environ of 1 process
- returns tuple with USER, DBUS, DISPLAY like follow:
- ('user', 'DISPLAY=:0',
- 'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus')
- returns None if these vars is not in /proc/[pid]/environ
- """
- try:
- env = str(rline1('/proc/' + pid + '/environ'))
- if display_env in env and dbus_env in env and user_env in env:
- env_list = env.split('\x00')
-
- # iterating over a list of process environment variables
- for i in env_list:
- if i.startswith(user_env):
- user = i
- if user == 'USER=root':
- return None
- continue
-
- if i.startswith(display_env):
- display = i[:10]
- continue
-
- if i.startswith(dbus_env):
- dbus = i
- continue
-
- if i.startswith('HOME='):
- # exclude Display Manager's user
- if i.startswith('HOME=/var'):
- return None
-
- try:
- env = user.partition('USER=')[2], display, dbus
- except UnboundLocalError:
- # print('notify helper: UnboundLocalError')
- return None
-
- return env
-
- except FileNotFoundError:
- # print('notify helper: FileNotFoundError')
- return None
- except ProcessLookupError:
- # print('notify helper: ProcessLookupError')
- return None
-
-
-def root_notify_env():
- """return set(user, display, dbus)"""
- unsorted_envs_list = []
- # iterates over processes, find processes with suitable env
- for pid in listdir('/proc'):
-
- if path.exists('/proc/' + pid + '/exe') is True:
- one_env = re_pid_environ(pid)
- unsorted_envs_list.append(one_env)
-
- env = set(unsorted_envs_list)
- env.discard(None)
-
- # deduplicate dbus
- new_env = []
- end = []
- for i in env:
- key = i[0] + i[1]
- if key not in end:
- end.append(key)
- new_env.append(i)
- else:
- continue
-
- return new_env
-
-
-try:
- write('/proc/self/oom_score_adj', '0')
-except Exception:
- pass
-
-
-try:
- from os import listdir, path
- from subprocess import Popen, TimeoutExpired
- from sys import argv
-except OSError:
- exit(1)
-
-if len(argv) == 5:
- _, uid, debug, title, body = argv
-else:
- print('{}: invalid input'.format(argv[0]))
- exit(1)
-
-uid = uid.partition('--euid=')[2]
-
-debug = debug.partition('--debug=')[2]
-
-if debug == 'True':
- debug = True
-else:
- debug = False
-
-title = title.partition('--title=')[2]
-
-body = decoder(body.partition('--body=')[2])
-
-if len(argv) != 5:
- print('nohang_notify_helper: invalid input')
- exit(1)
-
-
-with open('/proc/meminfo') as f:
- for line in f:
- if line.startswith('SwapTotal'):
- swap_total = int(line.split(':')[1][:-4])
- if swap_total > 0:
- wait_time = 15
- else:
- wait_time = 3
-
-
-if debug:
- print('nohang_notify_helper: wait_time:', wait_time, 'sec')
-
-
-if uid != '0':
- cmd = ['notify-send', '--icon=dialog-warning', title, body]
- if debug:
- print('nohang_notify_helper: run cmd:', cmd)
- with Popen(cmd) as proc:
- try:
- proc.wait(timeout=wait_time)
- except TimeoutExpired:
- proc.kill()
- if debug:
- print('nohang_notify_helper: TimeoutExpired')
- exit()
-
-display_env = 'DISPLAY='
-dbus_env = 'DBUS_SESSION_BUS_ADDRESS='
-user_env = 'USER='
-
-list_with_envs = root_notify_env()
-list_len = len(list_with_envs)
-
-# if somebody logged in with GUI
-if list_len > 0:
-
- for i in list_with_envs:
- if debug:
- print('Send a GUI notification:\n ',
- 'title: ', [title],
- '\n body: ', [body],
- '\n user/env:', i
- )
-
- # iterating over logged-in users
- for i in list_with_envs:
- username, display_env, dbus_env = i[0], i[1], i[2]
- display_tuple = display_env.partition('=')
- dbus_tuple = dbus_env.partition('=')
- display_value = display_tuple[2]
- dbus_value = dbus_tuple[2]
-
- try:
- with Popen([
- 'sudo', '-u', username,
- 'env',
- 'DISPLAY=' + display_value,
- 'DBUS_SESSION_BUS_ADDRESS=' + dbus_value,
- 'notify-send',
- '--icon=dialog-warning',
- title,
- body
- ]) as proc:
- try:
- proc.wait(timeout=wait_time)
- except TimeoutExpired:
- proc.kill()
- print('TimeoutExpired: notify user: ' + username)
- except BlockingIOError:
- print('nohang_notify_helper: BlockingIOError')
- except OSError:
- print('nohang_notify_helper: OSError')
- except Exception:
- print('nohang_notify_helper: CANNOT SPAWN NOTIFY-SEND PROCESS')
-else:
- if debug:
- print(
- 'Not send GUI notification: [',
- title,
- body,
- ']. Nobody logged-in with GUI. Nothing to do.')