diff --git a/trash/n11 b/trash/n11
new file mode 100755
index 0000000..6ffa8cc
--- /dev/null
+++ b/trash/n11
@@ -0,0 +1,3073 @@
+#!/usr/bin/env python3
+"""A daemon that prevents OOM in Linux systems."""
+
+import os
+from ctypes import CDLL
+from time import sleep, time
+from operator import itemgetter
+from sys import stdout, stderr, argv, exit, version
+from re import search
+from sre_constants import error as invalid_re
+from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
+
+
+##########################################################################
+
+# define functions
+
+'''
+def self_rss():
+ """
+ """
+ return pid_to_status(self_pid)[5]
+
+
+def print_self_rss():
+ """
+ """
+ log('Self RSS: {} MiB'.format(self_rss()))
+'''
+
+
+
+
+
+
+
+
+
+
+
+
+
+def cgroup2_root():
+ """
+ """
+ with open('/proc/mounts') as f:
+ for line in f:
+ if ' cgroup2 ' in line:
+ return line[7:].rpartition(' cgroup2 ')[0]
+
+
+
+
+def cgroup2_to_psi_file(cg2):
+ """
+ """
+ cg2root = cgroup2_root()
+ if cg2root is not None:
+ return cg2root + cg2 + '/memory.pressure'
+
+
+
+
+
+def get_psi_mem_files(cgroup2_path):
+ """
+ """
+
+ path_list = []
+
+ for root, dirs, files in os.walk(cgroup2_path):
+ for file in files:
+ path = os.path.join(root, file)
+ if path.endswith('/memory.pressure'): #############
+ path_list.append(path)
+
+ return path_list
+
+
+def psi_path_to_cgroup2(path):
+ """
+ """
+ return path.partition(i)[2][:-16]
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def get_swap_threshold_tuple(string):
+ # re (Num %, True) or (Num KiB, False)
+ """Returns KiB value if abs val was set in config, or tuple with %"""
+ # return tuple with abs and bool: (abs %, True) or (abs MiB, False)
+
+ if string.endswith('%'):
+ valid = string_to_float_convert_test(string[:-1])
+ if valid is None:
+ errprint('somewhere swap unit is not float_%')
+ exit(1)
+
+ value = float(string[:-1].strip())
+ if value < 0 or value > 100:
+ errprint('invalid value, must be from the range[0; 100] %')
+ exit(1)
+
+ return value, True
+
+ elif string.endswith('M'):
+ valid = string_to_float_convert_test(string[:-1])
+ if valid is None:
+ errprint('somewhere swap unit is not float_M')
+ exit(1)
+
+ value = float(string[:-1].strip()) * 1024
+ if value < 0:
+ errprint('invalid unit in config (negative value)')
+ exit(1)
+
+ return value, False
+
+ else:
+ errprint(
+ 'Invalid config file. There are invalid units somewhere\nExit')
+ exit(1)
+
+
+def find_cgroup_indexes():
+ """ Find cgroup-line positions in /proc/*/cgroup file.
+ """
+
+ cgroup_v1_index = cgroup_v2_index = None
+
+ with open('/proc/self/cgroup') as f:
+ for index, line in enumerate(f):
+ if ':name=' in line:
+ cgroup_v1_index = index
+ if line.startswith('0::'):
+ cgroup_v2_index = index
+
+ return cgroup_v1_index, cgroup_v2_index
+
+
+def pid_to_rss(pid):
+ """
+ """
+ try:
+ rss = int(rline1(
+ '/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE
+ except IndexError:
+ rss = None
+ except FileNotFoundError:
+ rss = None
+ except ProcessLookupError:
+ rss = None
+ return rss
+
+
+def pid_to_vm_size(pid):
+ """
+ """
+ try:
+ vm_size = int(rline1(
+ '/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE
+ except IndexError:
+ vm_size = None
+ except FileNotFoundError:
+ vm_size = None
+ except ProcessLookupError:
+ vm_size = None
+ return vm_size
+
+
+def signal_handler(signum, frame):
+ """
+ """
+ for i in sig_list:
+ signal(i, signal_handler_inner)
+ log('Signal handler called with the {} signal '.format(
+ sig_dict[signum]))
+ update_stat_dict_and_print(None)
+ log('Exit')
+ exit()
+
+
+def signal_handler_inner(signum, frame):
+ """
+ """
+ log('Signal handler called with the {} signal (ignored) '.format(
+ sig_dict[signum]))
+
+
+def exe(cmd):
+ """
+ """
+ log('Execute the command: {}'.format(cmd))
+ t0 = time()
+ write_self_oom_score_adj(self_oom_score_adj_max)
+ err = os.system(cmd)
+ write_self_oom_score_adj(self_oom_score_adj_min)
+ dt = time() - t0
+ log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
+ return err
+
+
+def write(path, string):
+ """
+ """
+ with open(path, 'w') as f:
+ f.write(string)
+
+
+def write_self_oom_score_adj(new_value):
+ """
+ """
+ if root:
+ write('/proc/self/oom_score_adj', new_value)
+
+
+def valid_re(reg_exp):
+ """Validate regular expression.
+ """
+ try:
+ search(reg_exp, '')
+ except invalid_re:
+ log('Invalid config: invalid regexp: {}'.format(reg_exp))
+ exit(1)
+
+
+def func_print_proc_table():
+ """
+ """
+ print_proc_table = True
+ find_victim(print_proc_table)
+ exit()
+
+
+def log(*msg):
+ """
+ """
+ try:
+ print(*msg)
+ except OSError:
+ sleep(0.01)
+ if separate_log:
+ try:
+ info(*msg)
+ except OSError:
+ sleep(0.01)
+
+
+def print_version():
+ """
+ """
+ try:
+ v = rline1('/etc/nohang/version')
+ except FileNotFoundError:
+ v = None
+ if v is None:
+ print('Nohang unknown version')
+ else:
+ print('Nohang ' + v)
+ exit()
+
+
+def test():
+ """
+ """
+ print('\n(This option is not ready to use!)\n')
+
+ print(version)
+ print(argv)
+
+ hr = '=================================='
+ print(hr)
+ print("uptime()")
+ print(uptime())
+
+ print(hr)
+ print("os.uname()")
+ print(os.uname())
+
+ print(hr)
+ print("pid_to_starttime('self')")
+ print(pid_to_starttime('self'))
+
+ print(hr)
+ print("get_victim_id('self')")
+ print(get_victim_id('self'))
+
+ print(hr)
+ print("errprint('test')")
+ print(errprint('test'))
+
+ print(hr)
+ print("mlockall()")
+ print(mlockall())
+
+ print(hr)
+ print("pid_to_state('2')")
+ print(pid_to_state('2'))
+
+ exit()
+
+
+def pid_to_cgroup_v1(pid):
+ """
+ """
+ cgroup_v1 = ''
+ try:
+ with open('/proc/' + pid + '/cgroup') as f:
+ for index, line in enumerate(f):
+ if index == cgroup_v1_index:
+ cgroup_v1 = '/' + line.partition('/')[2][:-1]
+ return cgroup_v1
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_cgroup_v2(pid):
+ """
+ """
+ cgroup_v2 = ''
+ try:
+ with open('/proc/' + pid + '/cgroup') as f:
+ for index, line in enumerate(f):
+ if index == cgroup_v2_index:
+ cgroup_v2 = line[3:-1]
+ return cgroup_v2
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_starttime(pid):
+ """ handle FNF error!
+ """
+ try:
+ starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[
+ 2].split(' ')[20]
+
+ except UnicodeDecodeError:
+ # print('LOL')
+ with open('/proc/' + pid + '/stat', 'rb') as f:
+ starttime = f.read().decode('utf-8', 'ignore').rpartition(
+ ')')[2].split(' ')[20]
+
+ return float(starttime) / SC_CLK_TCK
+
+
+def get_victim_id(pid):
+ """victim_id is starttime + pid"""
+ try:
+ return rline1('/proc/' + pid + '/stat').rpartition(
+ ')')[2].split(' ')[20] + '_pid' + pid
+ except FileNotFoundError:
+ return ''
+ except ProcessLookupError:
+ return ''
+
+
+def pid_to_state(pid):
+ """ Handle FNF error! (BTW it already handled in find_victim_info())
+ """
+ return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1]
+
+
+def pid_to_name(pid):
+ """
+ """
+ try:
+ with open('/proc/' + pid + '/comm', 'rb') as f:
+ return f.read().decode('utf-8', 'ignore')[:-1]
+ except FileNotFoundError:
+ return ''
+ except ProcessLookupError:
+ return ''
+
+
+def pid_to_ppid(pid):
+ """
+ """
+ try:
+ with open('/proc/' + pid + '/status') as f:
+ for n, line in enumerate(f):
+ if n is ppid_index:
+ return line.split('\t')[1].strip()
+ except FileNotFoundError:
+ return ''
+ except ProcessLookupError:
+ return ''
+ except UnicodeDecodeError:
+ with open('/proc/' + pid + '/status', 'rb') as f:
+ f_list = f.read().decode('utf-8', 'ignore').split('\n')
+ for i in range(len(f_list)):
+ if i is ppid_index:
+ return f_list[i].split('\t')[1]
+
+
+def pid_to_ancestry(pid, max_ancestry_depth=1):
+ """
+ """
+ if max_ancestry_depth == 1:
+ ppid = pid_to_ppid(pid)
+ pname = pid_to_name(ppid)
+ return '\n PPID: {} ({})'.format(ppid, pname)
+ if max_ancestry_depth == 0:
+ return ''
+ anc_list = []
+ for i in range(max_ancestry_depth):
+ ppid = pid_to_ppid(pid)
+ pname = pid_to_name(ppid)
+ anc_list.append((ppid, pname))
+ if ppid == '1':
+ break
+ pid = ppid
+ a = ''
+ for i in anc_list:
+ a = a + ' <= PID {} ({})'.format(i[0], i[1])
+ return '\n Ancestry: ' + a[4:]
+
+
+def pid_to_cmdline(pid):
+ """
+ Get process cmdline by pid.
+
+ pid: str pid of required process
+ returns string cmdline
+ """
+ try:
+ with open('/proc/' + pid + '/cmdline') as f:
+ return f.read().replace('\x00', ' ').rstrip()
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_environ(pid):
+ """
+ Get process environ by pid.
+
+ pid: str pid of required process
+ returns string environ
+ """
+ try:
+ with open('/proc/' + pid + '/environ') as f:
+ return f.read().replace('\x00', ' ').rstrip()
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_realpath(pid):
+ """
+ """
+ try:
+ return os.path.realpath('/proc/' + pid + '/exe')
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_uid(pid):
+ """return euid"""
+ try:
+ with open('/proc/' + pid + '/status') as f:
+ for n, line in enumerate(f):
+ if n is uid_index:
+ return line.split('\t')[2]
+ except UnicodeDecodeError:
+ with open('/proc/' + pid + '/status', 'rb') as f:
+ f_list = f.read().decode('utf-8', 'ignore').split('\n')
+ return f_list[uid_index].split('\t')[2]
+ except FileNotFoundError:
+ return ''
+
+
+def pid_to_badness(pid):
+ """Find and modify badness (if it needs)."""
+
+ try:
+
+ oom_score = int(rline1('/proc/' + pid + '/oom_score'))
+ badness = oom_score
+
+ if decrease_oom_score_adj:
+ oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
+ if badness > oom_score_adj_max and oom_score_adj > 0:
+ badness = badness - oom_score_adj + oom_score_adj_max
+
+ if regex_matching:
+ name = pid_to_name(pid)
+ for re_tup in processname_re_list:
+ if search(re_tup[1], name) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_cgroup_v1:
+ cgroup_v1 = pid_to_cgroup_v1(pid)
+ for re_tup in cgroup_v1_re_list:
+ if search(re_tup[1], cgroup_v1) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_cgroup_v2:
+ cgroup_v2 = pid_to_cgroup_v2(pid)
+ for re_tup in cgroup_v2_re_list:
+ if search(re_tup[1], cgroup_v2) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_realpath:
+ realpath = pid_to_realpath(pid)
+ for re_tup in realpath_re_list:
+ if search(re_tup[1], realpath) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_cmdline:
+ cmdline = pid_to_cmdline(pid)
+ for re_tup in cmdline_re_list:
+ if search(re_tup[1], cmdline) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_environ:
+ environ = pid_to_environ(pid)
+ for re_tup in environ_re_list:
+ if search(re_tup[1], environ) is not None:
+ badness += int(re_tup[0])
+
+ if re_match_uid:
+ uid = pid_to_uid(pid)
+ for re_tup in uid_re_list:
+ if search(re_tup[1], uid) is not None:
+ badness += int(re_tup[0])
+
+ if forbid_negative_badness:
+ if badness < 0:
+ badness = 0
+
+ return badness, oom_score
+
+ except FileNotFoundError:
+ return None, None
+ except ProcessLookupError:
+ return None, None
+
+
+def pid_to_status(pid):
+ """
+ """
+
+ try:
+
+ with open('/proc/' + pid + '/status') as f:
+
+ for n, line in enumerate(f):
+
+ if n is 0:
+ name = line.split('\t')[1][:-1]
+
+ if n is state_index:
+ state = line.split('\t')[1][0]
+ continue
+
+ if n is ppid_index:
+ ppid = line.split('\t')[1][:-1]
+ continue
+
+ if n is uid_index:
+ uid = line.split('\t')[2]
+ continue
+
+ if n is vm_size_index:
+ vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is vm_rss_index:
+ vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is vm_swap_index:
+ vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
+ break
+
+ return name, state, ppid, uid, vm_size, vm_rss, vm_swap
+
+ except UnicodeDecodeError:
+ return pid_to_status_unicode(pid)
+
+ except FileNotFoundError:
+ return None
+
+ except ProcessLookupError:
+ return None
+
+ except ValueError:
+ return None
+
+
+def pid_to_status_unicode(pid):
+ """
+ """
+ try:
+
+ with open('/proc/' + pid + '/status', 'rb') as f:
+ f_list = f.read().decode('utf-8', 'ignore').split('\n')
+
+ for i in range(len(f_list)):
+
+ if i is 0:
+ name = f_list[i].split('\t')[1]
+
+ if i is state_index:
+ state = f_list[i].split('\t')[1][0]
+
+ if i is ppid_index:
+ ppid = f_list[i].split('\t')[1]
+
+ if i is uid_index:
+ uid = f_list[i].split('\t')[2]
+
+ if i is vm_size_index:
+ vm_size = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is vm_rss_index:
+ vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
+
+ if i is vm_swap_index:
+ vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
+
+ return name, state, ppid, uid, vm_size, vm_rss, vm_swap
+
+ except FileNotFoundError:
+ return None
+
+ except ProcessLookupError:
+ return None
+
+ except ValueError:
+ return None
+
+
+def uptime():
+ """
+ """
+ return float(rline1('/proc/uptime').split(' ')[0])
+
+
+def errprint(*text):
+ """
+ """
+ print(*text, file=stderr, flush=True)
+
+
+def mlockall():
+ """Lock all memory to prevent swapping nohang process."""
+
+ MCL_CURRENT = 1
+ MCL_FUTURE = 2
+ MCL_ONFAULT = 4
+
+ libc = CDLL('libc.so.6', use_errno=True)
+
+ result = libc.mlockall(
+ MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
+ )
+ if result != 0:
+ result = libc.mlockall(
+ MCL_CURRENT | MCL_FUTURE
+ )
+ if result != 0:
+ log('WARNING: cannot lock all memory')
+ else:
+ pass
+ # log('All memory locked with MCL_CURRENT | MCL_FUTURE')
+ else:
+ pass
+ # log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
+
+
+def update_stat_dict_and_print(key):
+ """
+ """
+
+ if key is not None:
+
+ if key not in stat_dict:
+
+ stat_dict.update({key: 1})
+
+ else:
+
+ new_value = stat_dict[key] + 1
+ stat_dict.update({key: new_value})
+
+ if print_total_stat:
+
+ stats_msg = 'Total stat (what happened in the last {}):'.format(
+ format_time(time() - start_time))
+
+ for i in stat_dict:
+ stats_msg += '\n {}: {}'.format(i, stat_dict[i])
+
+ log(stats_msg)
+
+
+def find_psi_metrics_value(psi_path, psi_metrics):
+ """
+ """
+
+ if psi_support:
+
+ if psi_metrics == 'some_avg10':
+ return float(rline1(psi_path).split(' ')[1].split('=')[1])
+ if psi_metrics == 'some_avg60':
+ return float(rline1(psi_path).split(' ')[2].split('=')[1])
+ if psi_metrics == 'some_avg300':
+ return float(rline1(psi_path).split(' ')[3].split('=')[1])
+
+ if psi_metrics == 'full_avg10':
+ with open(psi_path) as f:
+ psi_list = f.readlines()
+ return float(psi_list[1].split(' ')[1].split('=')[1])
+ if psi_metrics == 'full_avg60':
+ with open(psi_path) as f:
+ psi_list = f.readlines()
+ return float(psi_list[1].split(' ')[2].split('=')[1])
+ if psi_metrics == 'full_avg300':
+ with open(psi_path) as f:
+ psi_list = f.readlines()
+ return float(psi_list[1].split(' ')[3].split('=')[1])
+
+
+def check_mem_and_swap():
+ """find mem_available, swap_total, swap_free"""
+ with open('/proc/meminfo') as f:
+ for n, line in enumerate(f):
+ if n is 2:
+ mem_available = int(line.split(':')[1][:-4])
+ continue
+ if n is swap_total_index:
+ swap_total = int(line.split(':')[1][:-4])
+ continue
+ if n is swap_free_index:
+ swap_free = int(line.split(':')[1][:-4])
+ break
+ return mem_available, swap_total, swap_free
+
+
+def check_zram():
+ """find MemUsedZram"""
+ disksize_sum = 0
+ mem_used_total_sum = 0
+
+ for dev in os.listdir('/sys/block'):
+ if dev.startswith('zram'):
+ stat = zram_stat(dev)
+ disksize_sum += int(stat[0])
+ mem_used_total_sum += int(stat[1])
+
+ # Means that when setting zram disksize = 1 GiB available memory
+ # decrease by 0.0042 GiB.
+ # Found experimentally, requires clarification with different kernaels and
+ # architectures.
+ # On small disk drives (up to gigabyte) it can be more, up to 0.0045.
+ # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should
+ # be 0.001:
+ # ("zram uses about 0.1% of the size of the disk"
+ # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
+ # but this statement contradicts the experimental data.
+ # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize
+ # Found experimentally.
+ ZRAM_DISKSIZE_FACTOR = 0.0042
+
+ return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
+
+
+def format_time(t):
+ """
+ """
+ t = int(t)
+ if t < 60:
+ return '{} sec'.format(t)
+ elif t >= 60 and t < 3600:
+ m = t // 60
+ s = t % 60
+ return '{} min {} sec'.format(m, s)
+ else:
+ h = t // 3600
+ s0 = t - h * 3600
+ m = s0 // 60
+ s = s0 % 60
+ return '{} h {} min {} sec'.format(h, m, s)
+
+
+def string_to_float_convert_test(string):
+ """Try to interprete string values as floats."""
+ try:
+ return float(string)
+ except ValueError:
+ return None
+
+
+def string_to_int_convert_test(string):
+ """Try to interpret string values as integers."""
+ try:
+ return int(string)
+ except ValueError:
+ return None
+
+
+def conf_parse_string(param):
+ """
+ Get string parameters from the config dict.
+
+ param: config_dict key
+ returns config_dict[param].strip()
+ """
+ if param in config_dict:
+ return config_dict[param].strip()
+ else:
+ errprint('All the necessary parameters must be in the config')
+ errprint('There is no "{}" parameter in the config'.format(param))
+ exit(1)
+
+
+def conf_parse_bool(param):
+ """
+ Get bool parameters from the config_dict.
+
+ param: config_dicst key
+ returns bool
+ """
+ if param in config_dict:
+ param_str = config_dict[param]
+ if param_str == 'True':
+ return True
+ elif param_str == 'False':
+ return False
+ else:
+ errprint('Invalid value of the "{}" parameter.'.format(param))
+ errprint('Valid values are True and False.')
+ errprint('Exit')
+ exit(1)
+ else:
+ errprint('All the necessary parameters must be in the config')
+ errprint('There is no "{}" parameter in the config'.format(param))
+ exit(1)
+
+
+def rline1(path):
+ """read 1st line from path."""
+ try:
+ with open(path) as f:
+ for line in f:
+ return line[:-1]
+ except UnicodeDecodeError:
+ with open(path, 'rb') as f:
+ return f.read(999).decode(
+ 'utf-8', 'ignore').split('\n')[0] # use partition()!
+
+
+def kib_to_mib(num):
+ """Convert KiB values to MiB values."""
+ return round(num / 1024.0)
+
+
+def percent(num):
+ """Interprete num as percentage."""
+ return round(num * 100, 1)
+
+
+def just_percent_mem(num):
+ """convert num to percent and justify"""
+ return str(round(num * 100, 1)).rjust(4, ' ')
+
+
+def just_percent_swap(num):
+ """
+ """
+ return str(round(num * 100, 1)).rjust(5, ' ')
+
+
+def human(num, lenth):
+ """Convert KiB values to MiB values with right alignment"""
+ return str(round(num / 1024)).rjust(lenth, ' ')
+
+
+def zram_stat(zram_id):
+ """
+ Get zram state.
+
+ zram_id: str zram block-device id
+ returns bytes diskcize, str mem_used_total
+ """
+ try:
+ disksize = rline1('/sys/block/' + zram_id + '/disksize')
+ except FileNotFoundError:
+ return '0', '0'
+ if disksize == ['0\n']:
+ return '0', '0'
+ try:
+ mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ')
+ mm_stat_list = []
+ for i in mm_stat:
+ if i != '':
+ mm_stat_list.append(i)
+ mem_used_total = mm_stat_list[2]
+ except FileNotFoundError:
+ mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total')
+ return disksize, mem_used_total # BYTES, str
+
+
+def send_notify_warn():
+ """
+ Look for process with maximum 'badness' and warn user with notification.
+ (implement Low memory warnings)
+ """
+ log('Warning threshold exceeded')
+
+ if check_warning_exe:
+ exe(warning_exe)
+
+ else:
+
+ title = 'Low memory'
+
+ body = 'MemAvail: {}%\nSwapFree: {}%'.format(
+ round(mem_available / mem_total * 100),
+ round(swap_free / (swap_total + 0.1) * 100)
+ )
+
+ send_notification(title, body)
+
+
+def send_notify(signal, name, pid):
+ """
+ Notificate about OOM Preventing.
+
+ signal: key for notify_sig_dict
+ name: str process name
+ pid: str process pid
+ """
+
+ # wait for memory release after corrective action
+ # may be useful if free memory was about 0 immediately after
+ # corrective action
+ sleep(0.05)
+
+ title = 'Freeze prevention'
+ body = '{} [{}] {}'.format(
+ notify_sig_dict[signal],
+ pid,
+ name.replace(
+ # symbol '&' can break notifications in some themes,
+ # therefore it is replaced by '*'
+ '&', '*'
+ )
+ )
+
+ send_notification(title, body)
+
+
+def send_notify_etc(pid, name, command):
+ """
+ Notificate about OOM Preventing.
+
+ command: str command that will be executed
+ name: str process name
+ pid: str process pid
+ """
+ title = 'Freeze prevention'
+ body = 'Victim is [{}] {}\nExecute the co' \
+ 'mmand:\n{}'.format(
+ pid, name.replace('&', '*'), command.replace('&', '*'))
+
+ send_notification(title, body)
+
+
+def send_notification(title, body):
+ """
+ """
+ split_by = '#' * 16
+
+ t000 = time()
+
+ path_to_cache = '/dev/shm/nohang_notify_cache_uid{}_time{}'.format(
+ str(self_uid), t000
+ )
+
+ text = '{}{}{}'.format(title, split_by, body)
+
+ try:
+ with open(path_to_cache, 'w') as f:
+ f.write(text)
+ os.chmod(path_to_cache, 0o600)
+ except OSError:
+ log('OSError while send notification '
+ '(No space left on device: /dev/shm)')
+ return None
+
+ cmd = '{} --uid {} --time {} &'.format(notify_helper_path, self_uid, t000)
+
+ exe(cmd)
+
+
+def get_pid_list():
+ """
+ Find pid list expect kthreads and zombies
+ """
+ pid_list = []
+ for pid in os.listdir('/proc'):
+ if os.path.exists('/proc/' + pid + '/exe') is True:
+ pid_list.append(pid)
+ return pid_list
+
+
+def get_non_decimal_pids():
+ """
+ """
+ non_decimal_list = []
+ for pid in pid_list:
+ if pid[0].isdecimal() is False:
+ non_decimal_list.append(pid)
+ return non_decimal_list
+
+
+def find_victim(_print_proc_table):
+ """
+ Find the process with highest badness and its badness adjustment
+ Return pid and badness
+ """
+
+ ft1 = time()
+
+ pid_list = get_pid_list()
+
+ pid_list.remove(self_pid)
+
+ if '1' in pid_list:
+ pid_list.remove('1')
+
+ non_decimal_list = get_non_decimal_pids()
+
+ for i in non_decimal_list:
+ if i in pid_list:
+ pid_list.remove(i)
+
+ pid_badness_list = []
+
+ if _print_proc_table:
+
+ if extra_table_info == 'None':
+ extra_table_title = ''
+
+ elif extra_table_info == 'cgroup_v1':
+ extra_table_title = 'CGroup_v1'
+
+ elif extra_table_info == 'cgroup_v2':
+ extra_table_title = 'CGroup_v2'
+
+ elif extra_table_info == 'cmdline':
+ extra_table_title = 'cmdline'
+
+ elif extra_table_info == 'environ':
+ extra_table_title = 'environ'
+
+ elif extra_table_info == 'realpath':
+ extra_table_title = 'realpath'
+
+ elif extra_table_info == 'All':
+ extra_table_title = '[CGroup] [CmdLine] [RealPath]'
+ else:
+ extra_table_title = ''
+
+ hr = '#' * 115
+
+ log(hr)
+ log('# PID PPID badness oom_score oom_score_adj e'
+ 'UID S VmSize VmRSS VmSwap Name {}'.format(
+ extra_table_title))
+ log('#------- ------- ------- --------- ------------- -------'
+ '--- - ------ ----- ------ --------------- --------')
+
+ for pid in pid_list:
+
+ badness = pid_to_badness(pid)[0]
+
+ if badness is None:
+ continue
+
+ if _print_proc_table:
+
+ try:
+ oom_score = rline1('/proc/' + pid + '/oom_score')
+ oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
+ except FileNotFoundError:
+ continue
+
+ if pid_to_status(pid) is None:
+ continue
+ else:
+ (name, state, ppid, uid, vm_size, vm_rss,
+ vm_swap) = pid_to_status(pid)
+
+ if extra_table_info == 'None':
+ extra_table_line = ''
+
+ elif extra_table_info == 'cgroup_v1':
+ extra_table_line = pid_to_cgroup_v1(pid)
+
+ elif extra_table_info == 'cgroup_v2':
+ extra_table_line = pid_to_cgroup_v2(pid)
+
+ elif extra_table_info == 'cmdline':
+ extra_table_line = pid_to_cmdline(pid)
+
+ elif extra_table_info == 'environ':
+ extra_table_line = pid_to_environ(pid)
+
+ elif extra_table_info == 'realpath':
+ extra_table_line = pid_to_realpath(pid)
+
+ elif extra_table_info == 'All':
+ extra_table_line = '[CG: {}] [CL: {}] [RP: {}]'.format(
+ pid_to_cgroup_v1(pid),
+ pid_to_cmdline(pid),
+ pid_to_realpath(pid)
+ )
+ else:
+ extra_table_line = ''
+
+ log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format(
+ pid.rjust(7),
+ ppid.rjust(7),
+ str(badness).rjust(7),
+ oom_score.rjust(9),
+ oom_score_adj.rjust(13),
+ uid.rjust(10),
+ state,
+ str(vm_size).rjust(6),
+ str(vm_rss).rjust(5),
+ str(vm_swap).rjust(6),
+ name.ljust(15),
+ extra_table_line
+ )
+ )
+
+ pid_badness_list.append((pid, badness))
+
+ real_proc_num = len(pid_badness_list)
+
+ # Make list of (pid, badness) tuples, sorted by 'badness' values
+ # print(pid_badness_list)
+ pid_tuple_list = sorted(
+ pid_badness_list,
+ key=itemgetter(1),
+ reverse=True
+ )[0]
+
+ pid = pid_tuple_list[0]
+
+ # Get maximum 'badness' value
+ victim_badness = pid_tuple_list[1]
+ victim_name = pid_to_name(pid)
+
+ if _print_proc_table:
+ log(hr)
+
+ log('Found {} processes with existing /proc/[pid]/exe'.format(
+ real_proc_num))
+
+ log(
+ 'Process with highest badness (found in {} ms):\n PID: {}, Na'
+ 'me: {}, badness: {}'.format(
+ round((time() - ft1) * 1000),
+ pid,
+ victim_name,
+ victim_badness
+ )
+ )
+
+ return pid, victim_badness, victim_name
+
+
+def find_victim_info(pid, victim_badness, name):
+ """
+ """
+ status0 = time()
+
+ try:
+
+ with open('/proc/' + pid + '/status') as f:
+
+ for n, line in enumerate(f):
+
+ if n is state_index:
+ state = line.split('\t')[1].rstrip()
+ continue
+
+ if n is ppid_index:
+ ppid = line.split('\t')[1]
+ continue
+
+ if n is uid_index:
+ uid = line.split('\t')[2]
+ continue
+
+ if n is vm_size_index:
+ vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is vm_rss_index:
+ vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
+ continue
+
+ if detailed_rss:
+
+ if n is anon_index:
+ anon_rss = kib_to_mib(
+ int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is file_index:
+ file_rss = kib_to_mib(
+ int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is shmem_index:
+ shmem_rss = kib_to_mib(
+ int(line.split('\t')[1][:-4]))
+ continue
+
+ if n is vm_swap_index:
+ vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
+ break
+
+ cmdline = pid_to_cmdline(pid)
+ oom_score = rline1('/proc/' + pid + '/oom_score')
+ oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
+
+ except FileNotFoundError:
+ log('The victim died in the search process: FileNotFoundError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: FileNotFoundError')
+ return None
+ except ProcessLookupError:
+ log('The victim died in the search process: ProcessLookupError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: ProcessLookupError')
+ return None
+ except UnicodeDecodeError:
+
+ with open('/proc/' + pid + '/status', 'rb') as f:
+ f_list = f.read().decode('utf-8', 'ignore').split('\n')
+
+ for i in range(len(f_list)):
+
+ if i is state_index:
+ state = f_list[i].split('\t')[1].rstrip()
+
+ if i is ppid_index:
+ ppid = f_list[i].split('\t')[1]
+
+ if i is uid_index:
+ uid = f_list[i].split('\t')[2]
+
+ if i is vm_size_index:
+ vm_size = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is vm_rss_index:
+ vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
+
+ if detailed_rss:
+
+ if i is anon_index:
+ anon_rss = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is file_index:
+ file_rss = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is shmem_index:
+ shmem_rss = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ if i is vm_swap_index:
+ vm_swap = kib_to_mib(
+ int(f_list[i].split('\t')[1][:-3]))
+
+ cmdline = pid_to_cmdline(pid)
+ oom_score = rline1('/proc/' + pid + '/oom_score')
+ oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
+
+ except IndexError:
+ log('The victim died in the search process: IndexError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: IndexError')
+ return None
+ except ValueError:
+ log('The victim died in the search process: ValueError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: ValueError')
+ return None
+ except FileNotFoundError:
+ log('The victim died in the search process: FileNotFoundError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: FileNotFoundError')
+ return None
+ except ProcessLookupError:
+ log('The victim died in the search process: ProcessLookupError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: ProcessLookupError')
+ return None
+
+ len_vm = len(str(vm_size))
+
+ try:
+ realpath = os.path.realpath('/proc/' + pid + '/exe')
+ victim_lifetime = format_time(uptime() - pid_to_starttime(pid))
+ victim_cgroup_v1 = pid_to_cgroup_v1(pid)
+ victim_cgroup_v2 = pid_to_cgroup_v2(pid)
+
+ except FileNotFoundError:
+ print('The victim died in the search process: FileNotFoundError')
+ update_stat_dict_and_print(
+ 'The victim died in the search process: FileNotFoundError')
+ return None
+
+ ancestry = pid_to_ancestry(pid, max_ancestry_depth)
+
+ if detailed_rss:
+ detailed_rss_info = ' (' \
+ 'Anon: {} MiB, ' \
+ 'File: {} MiB, ' \
+ 'Shmem: {} MiB)'.format(
+ anon_rss,
+ file_rss,
+ shmem_rss)
+ else:
+ detailed_rss_info = ''
+
+ victim_info = 'Victim information (found in {} ms):' \
+ '\n Name: {}' \
+ '\n State: {}' \
+ '\n PID: {}' \
+ '{}' \
+ '\n EUID: {}' \
+ '\n badness: {}, ' \
+ 'oom_score: {}, ' \
+ 'oom_score_adj: {}' \
+ '\n VmSize: {} MiB' \
+ '\n VmRSS: {} MiB {}' \
+ '\n VmSwap: {} MiB' \
+ '\n CGroup_v1: {}' \
+ '\n CGroup_v2: {}' \
+ '\n Realpath: {}' \
+ '\n Cmdline: {}' \
+ '\n Lifetime: {}'.format(
+ round((time() - status0) * 1000),
+ name,
+ state,
+ pid,
+ ancestry,
+ uid,
+ victim_badness,
+ oom_score,
+ oom_score_adj,
+ vm_size,
+ str(vm_rss).rjust(len_vm),
+ detailed_rss_info,
+ str(vm_swap).rjust(len_vm),
+ victim_cgroup_v1,
+ victim_cgroup_v2,
+ realpath,
+ cmdline,
+ victim_lifetime)
+
+ return victim_info
+
+
+def implement_corrective_action(signal):
+ """
+ Find victim with highest badness and send SIGTERM/SIGKILL
+ """
+ time0 = time()
+
+ # выходим из фции, если для SIGTERM порога не превышено время
+ # min_delay_after_sigterm и спим в течение over_sleep
+ if signal is SIGTERM:
+
+ dt = time() - actions_time_dict['action_handled'][0]
+
+ if dt < min_delay_after_sigterm:
+ print('min_delay_after_sigterm IS NOT EXCEEDED ({} < {})'.format(
+ round(dt, 3), min_delay_after_sigterm))
+
+ if print_sleep_periods:
+ log('Sleep {} sec [in implement_corrective_action()]'.format(
+ over_sleep))
+
+ sleep(over_sleep)
+
+ return None # время задержки между действиями не истекло
+ else:
+ print('min_delay_after_sigterm IS EXCEEDED, it is time to action')
+
+ """
+
+ При заходе в фцию проверяем права на сигтерм. Права на сигкилл у нас
+ всегда есть
+ (потому что идем дальше только после полн освободж памяти после
+ смерти жертвы)
+
+ actions_time_dict[action_handled] = time()
+ actions_time_dict[veto] = True
+
+ actions_time_dict['action_handled'] = [time(), victim_id]
+
+
+
+ """
+
+ log(mem_info)
+
+ pid, victim_badness, name = find_victim(print_proc_table)
+
+ if victim_badness >= min_badness:
+
+ if print_victim_info:
+ victim_info = find_victim_info(pid, victim_badness, name)
+ log(victim_info)
+
+ # пороги могли превысиься за время поиска жертвы (поиск может занимать
+ # сотни миллисекунд)
+ mem_available, swap_total, swap_free = check_mem_and_swap()
+
+ ma_mib = int(mem_available) / 1024.0
+ sf_mib = int(swap_free) / 1024.0
+ log('Memory status before implementing a corrective act'
+ 'ion:\n MemAvailable'
+ ': {} MiB, SwapFree: {} MiB'.format(
+ round(ma_mib, 1), round(sf_mib, 1)
+ )
+ )
+
+ if (mem_available <= mem_min_sigkill_kb and
+ swap_free <= swap_min_sigkill_kb):
+ log('Hard threshold exceeded')
+ signal = SIGKILL
+
+ victim_id = get_victim_id(pid)
+
+ # kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ
+ # ЗАДАННОГО ВРЕМЕНИ
+
+ # переопределяем сигнал для старых жертв
+ if signal is SIGTERM:
+
+ if victim_id in victim_dict:
+
+ dt = time() - victim_dict[victim_id]
+
+ if dt > max_post_sigterm_victim_lifetime:
+ print('max_post_sigterm_victim_lifetime exceeded: the '
+ 'victim will get SIGKILL')
+ signal = SIGKILL
+
+ # matching with re to customize corrective actions
+ soft_match = False
+
+ if soft_actions and signal is SIGTERM:
+ name = pid_to_name(pid)
+ cgroup_v1 = pid_to_cgroup_v1(pid)
+ service = ''
+ cgroup_v1_tail = cgroup_v1.rpartition('/')[2]
+ if cgroup_v1_tail.endswith('.service'):
+ service = cgroup_v1_tail
+ for i in soft_actions_list:
+ unit = i[0]
+ if unit == 'name':
+ u = name
+ else:
+ u = cgroup_v1
+ regexp = i[1]
+ command = i[2]
+ if search(regexp, u) is not None:
+ log("Regexp '{}' matches with {} '{}'".format(
+ regexp, unit, u))
+ soft_match = True
+ break
+
+ if soft_match:
+
+ # todo: make new func
+ m = check_mem_and_swap()
+ ma = int(m[0]) / 1024.0
+ sf = int(m[2]) / 1024.0
+ log('Memory status before implementing a corrective act'
+ 'ion:\n MemAvailable'
+ ': {} MiB, SwapFree: {} MiB'.format(
+ round(ma, 1), round(sf, 1)
+ )
+ )
+
+ cmd = command.replace(
+ '$PID',
+ pid).replace(
+ '$NAME',
+ pid_to_name(pid)).replace(
+ '$SERVICE',
+ service)
+
+ exit_status = exe(cmd)
+
+ exit_status = str(exit_status)
+
+ response_time = time() - time0
+
+ # тут надо, как и при дефолтном действии, проверять существование
+ # жертвы, ее реакцию на действие,
+ # и время ее смерти в случае успеха, о обновление таймстемпов
+ # действия
+
+ etc_info = 'Implement a corrective act' \
+ 'ion:\n Run the command: {}' \
+ '\n Exit status: {}; total response ' \
+ 'time: {} ms'.format(
+ cmd,
+ exit_status,
+ round(response_time * 1000))
+
+ log(etc_info)
+
+ key = "Run the command '{}'".format(cmd)
+ update_stat_dict_and_print(key)
+
+ if gui_notifications:
+ send_notify_etc(
+ pid,
+ name,
+ command.replace('$PID', pid).replace(
+ '$NAME', pid_to_name(pid)))
+
+ else:
+
+ # обычное действие через сигнал
+ try:
+
+ os.kill(int(pid), signal)
+ kill_timestamp = time()
+ response_time = kill_timestamp - time0
+
+ while True:
+ exe_exists = os.path.exists('/proc/{}/exe'.format(pid))
+ rss = pid_to_rss(pid)
+ dt = time() - kill_timestamp
+ log('Victim VmRSS: {} KiB'.format(rss))
+ if not exe_exists or rss == 0 or dt > 0.01:
+ # print(dt)
+ break
+ sleep(0.001)
+
+ if dt > 0.01:
+ log('Timer (value = 0.01 sec) expired; seems'
+ ' like the victim handles signal')
+
+ actions_time_dict['action_handled'] = [
+ time(), get_victim_id(pid)]
+
+ if victim_id not in victim_dict: # хз как надо.
+ victim_dict.update({victim_id: time()})
+
+ # log('actions_time_dict', actions_time_dict)
+ # log('victim_dict', victim_dict)
+
+ else:
+ log('Process exited (VmRSS = 0) in {} sec'.format(
+ round(dt, 5)))
+
+ if signal is SIGKILL or not exe_exists or rss == 0:
+
+ while True:
+ sleep(0.001)
+ # рсс не важен когда путь не существует. Проверяй
+ # просто существование пид.
+ rss = pid_to_rss(pid)
+ if rss is None:
+ break
+ t1 = time()
+ kill_duration = t1 - kill_timestamp
+ log('The victim died in {} sec'.format(
+ round(kill_duration, 3)))
+
+ mem_available, swap_total, swap_free = check_mem_and_swap()
+
+ ma_mib = int(mem_available) / 1024.0
+ sf_mib = int(swap_free) / 1024.0
+ log('Memory status after implementing a corrective act'
+ 'ion:\n MemAvailable'
+ ': {} MiB, SwapFree: {} MiB'.format(
+ round(ma_mib, 1), round(sf_mib, 1)
+ )
+ )
+
+ send_result = 'total response time: {} ms'.format(
+ round(response_time * 1000))
+
+ preventing_oom_message = 'Implement a corrective action:' \
+ '\n Send {} to the victim; {}'.format(
+ sig_dict[signal], send_result)
+
+ key = 'Send {} to {}'.format(sig_dict[signal], name)
+
+ if signal is SIGKILL and post_kill_exe != '':
+
+ cmd = post_kill_exe.replace('$PID', pid).replace(
+ '$NAME', pid_to_name(pid))
+
+ log('Execute post_kill_exe')
+
+ exe(cmd)
+
+ if gui_notifications:
+ send_notify(signal, name, pid)
+
+ except FileNotFoundError:
+ response_time = time() - time0
+ send_result = 'no such process; response time: {} ms'.format(
+ round(response_time * 1000))
+ key = 'FileNotFoundError (the victim died in the se' \
+ 'arch process): '
+ except ProcessLookupError:
+ response_time = time() - time0
+ send_result = 'no such process; response time: {} ms'.format(
+ round(response_time * 1000))
+ key = 'ProcessLookupError (the victim died in the se' \
+ 'arch process): '
+
+ try:
+ log(preventing_oom_message)
+
+ except UnboundLocalError:
+ preventing_oom_message = key
+
+ update_stat_dict_and_print(key)
+
+ else:
+
+ response_time = time() - time0
+ victim_badness_is_too_small = 'victim badness {} < min_b' \
+ 'adness {}; nothing to do; response time: {} ms'.format(
+ victim_badness,
+ min_badness,
+ round(response_time * 1000))
+
+ log(victim_badness_is_too_small)
+
+ # update stat_dict
+ key = 'victim badness < min_badness'
+ update_stat_dict_and_print(key)
+
+ # тут надо поспать хорошенько. а может и счетчики поправить.
+ # херню несу. во-первых, внезапно может кто-то появиться c блльшим
+ # бэднес.. Далее надо минимизировать аутпут спам.
+ sleep(over_sleep)
+
+ # обновлять время не на каждый кил, а только на килл той жертвы,
+ # которая не отвечала на софт экшн.
+ # Вывод: ко времени действия прилагать также виктим айди.
+
+ print('##################################################################')
+
+
+def sleep_after_check_mem():
+ """Specify sleep times depends on rates and avialable memory."""
+
+ if stable_sleep:
+
+ if print_sleep_periods:
+ log('Sleep {} sec'.format(min_sleep))
+
+ sleep(min_sleep)
+ return None
+
+ if mem_min_sigkill_kb < mem_min_sigterm_kb:
+ mem_point = mem_available - mem_min_sigterm_kb
+ else:
+ mem_point = mem_available - mem_min_sigkill_kb
+
+ if swap_min_sigkill_kb < swap_min_sigterm_kb:
+ swap_point = swap_free - swap_min_sigterm_kb
+ else:
+ swap_point = swap_free - swap_min_sigkill_kb
+
+ if swap_point < 0:
+ swap_point = 0
+
+ if mem_point < 0:
+ mem_point = 0
+
+ t_mem = mem_point / rate_mem
+ t_swap = swap_point / rate_swap
+
+ if CHECK_ZRAM:
+ t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram
+ if t_zram < 0:
+ t_zram = 0
+ t_mem_zram = t_mem + t_zram
+
+ t_mem_swap = t_mem + t_swap
+
+ if CHECK_ZRAM:
+
+ if t_mem_swap <= t_mem_zram:
+ t = t_mem_swap
+ else:
+ t = t_mem_zram
+ else:
+ t = t_mem_swap
+
+ if t > max_sleep:
+ t = max_sleep
+ elif t < min_sleep:
+ t = min_sleep
+ else:
+ pass
+
+ if print_sleep_periods:
+
+ log(
+ 'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format(
+ round(t, 2),
+ round(t_mem, 2),
+ round(t_swap, 2),
+ round(t_zram, 2)
+ )
+ )
+
+ try:
+ stdout.flush()
+ except OSError:
+ pass
+
+ sleep(t)
+
+
+def calculate_percent(arg_key):
+ """
+ parse conf dict
+ Calculate mem_min_KEY_percent.
+
+ Try use this one)
+ arg_key: str key for config_dict
+ returns int mem_min_percent or NoneType if got some error
+ """
+
+ if arg_key in config_dict:
+ mem_min = config_dict[arg_key]
+
+ if mem_min.endswith('%'):
+ # truncate percents, so we have a number
+ mem_min_percent = mem_min[:-1].strip()
+ # then 'float test'
+ mem_min_percent = string_to_float_convert_test(mem_min_percent)
+ if mem_min_percent is None:
+ errprint('Invalid {} value, not float\nExit'.format(arg_key))
+ exit(1)
+ # Final validations...
+ if mem_min_percent < 0 or mem_min_percent > 100:
+ errprint(
+ '{}, as percents value, out of ran'
+ 'ge [0; 100]\nExit'.format(arg_key))
+ exit(1)
+
+ # mem_min_sigterm_percent is clean and valid float percentage. Can
+ # translate into Kb
+ mem_min_kb = mem_min_percent / 100 * mem_total
+ mem_min_mb = round(mem_min_kb / 1024)
+
+ elif mem_min.endswith('M'):
+ mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip())
+ if mem_min_mb is None:
+ errprint('Invalid {} value, not float\nExit'.format(arg_key))
+ exit(1)
+ mem_min_kb = mem_min_mb * 1024
+ if mem_min_kb > mem_total:
+ errprint(
+ '{} value can not be greater then MemT'
+ 'otal ({} MiB)\nExit'.format(
+ arg_key, round(
+ mem_total / 1024)))
+ exit(1)
+ mem_min_percent = mem_min_kb / mem_total * 100
+
+ else:
+ log('Invalid {} units in config.\n Exit'.format(arg_key))
+ exit(1)
+ mem_min_percent = None
+
+ else:
+ log('{} not in config\nExit'.format(arg_key))
+ exit(1)
+ mem_min_percent = None
+
+ return mem_min_kb, mem_min_mb, mem_min_percent
+
+
+##########################################################################
+
+
+start_time = time()
+
+
+help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG]
+
+optional arguments:
+ -h, --help show this help message and exit
+ -v, --version print version
+ -t, --test print some tests
+ -p, --print-proc-table
+ print table of processes with their badness values
+ -c CONFIG, --config CONFIG
+ path to the config file, default values:
+ ./nohang.conf, /etc/nohang/nohang.conf"""
+
+
+SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
+
+SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE'])
+
+conf_err_mess = 'Invalid config. Exit.'
+
+sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP]
+
+sig_dict = {
+ SIGKILL: 'SIGKILL',
+ SIGINT: 'SIGINT',
+ SIGQUIT: 'SIGQUIT',
+ SIGHUP: 'SIGHUP',
+ SIGTERM: 'SIGTERM'
+}
+
+self_pid = str(os.getpid())
+
+self_uid = os.geteuid()
+
+if self_uid == 0:
+ root = True
+else:
+ root = False
+
+
+if os.path.exists('./nohang_notify_helper'):
+ notify_helper_path = './nohang_notify_helper'
+else:
+ notify_helper_path = '/usr/sbin/nohang_notify_helper'
+
+
+victim_dict = dict()
+
+
+victim_id = None
+actions_time_dict = dict()
+actions_time_dict['action_handled'] = [time(), victim_id]
+# print(actions_time_dict)
+
+
+# will store corrective actions stat
+stat_dict = dict()
+
+
+separate_log = False # will be overwritten after parse config
+
+
+cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
+
+
+self_oom_score_adj_min = '-600'
+self_oom_score_adj_max = '-6'
+
+
+write_self_oom_score_adj(self_oom_score_adj_min)
+
+
+pid_list = get_pid_list()
+
+
+print_proc_table_flag = False
+
+if len(argv) == 1:
+ if os.path.exists('./nohang.conf'):
+ config = os.getcwd() + '/nohang.conf'
+ else:
+ config = '/etc/nohang/nohang.conf'
+
+elif len(argv) == 2:
+ if argv[1] == '--help' or argv[1] == '-h':
+ print(help_mess)
+ exit()
+ elif argv[1] == '--version' or argv[1] == '-v':
+ print_version()
+ elif argv[1] == '--test' or argv[1] == '-t':
+ test()
+ elif argv[1] == '--print-proc-table' or argv[1] == '-p':
+ print_proc_table_flag = True
+ if os.path.exists('./nohang.conf'):
+ config = os.getcwd() + '/nohang.conf'
+ else:
+ config = '/etc/nohang/nohang.conf'
+ else:
+ errprint('Unknown option: {}'.format(argv[1]))
+ exit(1)
+
+elif len(argv) == 3:
+ if argv[1] == '--config' or argv[1] == '-c':
+ config = argv[2]
+ else:
+ errprint('Unknown option: {}'.format(argv[1]))
+ exit(1)
+
+else:
+ errprint('Invalid CLI input: too many options')
+ exit(1)
+
+
+# find mem_total
+# find positions of SwapFree and SwapTotal in /proc/meminfo
+
+with open('/proc/meminfo') as f:
+ mem_list = f.readlines()
+
+mem_list_names = []
+for s in mem_list:
+ mem_list_names.append(s.split(':')[0])
+
+if mem_list_names[2] != 'MemAvailable':
+ errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied')
+ # exit(1)
+
+swap_total_index = mem_list_names.index('SwapTotal')
+swap_free_index = swap_total_index + 1
+
+mem_total = int(mem_list[0].split(':')[1][:-4])
+
+# Get names from /proc/*/status to be able to get VmRSS and VmSwap values
+
+with open('/proc/self/status') as file:
+ status_list = file.readlines()
+
+status_names = []
+for s in status_list:
+ status_names.append(s.split(':')[0])
+
+ppid_index = status_names.index('PPid')
+vm_size_index = status_names.index('VmSize')
+vm_rss_index = status_names.index('VmRSS')
+vm_swap_index = status_names.index('VmSwap')
+uid_index = status_names.index('Uid')
+state_index = status_names.index('State')
+
+
+try:
+ anon_index = status_names.index('RssAnon')
+ file_index = status_names.index('RssFile')
+ shmem_index = status_names.index('RssShmem')
+ detailed_rss = True
+ # print(detailed_rss, 'detailed_rss')
+except ValueError:
+ detailed_rss = False
+ # print('It is not Linux 4.5+')
+
+
+log('Config: ' + config)
+
+
+##########################################################################
+
+# parsing the config with obtaining the parameters dictionary
+
+# conf_parameters_dict
+# conf_restart_dict
+
+# dictionary with config options
+config_dict = dict()
+
+processname_re_list = []
+cmdline_re_list = []
+environ_re_list = []
+uid_re_list = []
+cgroup_v1_re_list = []
+cgroup_v2_re_list = []
+realpath_re_list = []
+
+soft_actions_list = []
+
+
+# separator for optional parameters (that starts with @)
+opt_separator = '///'
+
+
+# stupid conf parsing, need refactoring
+try:
+ with open(config) as f:
+
+ for line in f:
+
+ a = line.startswith('#')
+ b = line.startswith('\n')
+ c = line.startswith('\t')
+ d = line.startswith(' ')
+
+ etc = line.startswith('@SOFT_ACTION_RE_NAME')
+ etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1')
+
+ if not a and not b and not c and not d and not etc and not etc2:
+ a = line.partition('=')
+
+ key = a[0].strip()
+ value = a[2].strip()
+
+ if key not in config_dict:
+ config_dict[key] = value
+ else:
+ log('ERROR: config key duplication: {}'.format(key))
+ exit(1)
+
+ if etc:
+
+ a = line.partition('@SOFT_ACTION_RE_NAME')[
+ 2].partition(opt_separator)
+
+ a1 = 'name'
+
+ a2 = a[0].strip()
+ valid_re(a2)
+
+ a3 = a[2].strip()
+
+ zzz = (a1, a2, a3)
+
+ soft_actions_list.append(zzz)
+
+ if etc2:
+
+ a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[
+ 2].partition(opt_separator)
+
+ a1 = 'cgroup_v1'
+
+ a2 = a[0].strip()
+ valid_re(a2)
+
+ a3 = a[2].strip()
+
+ zzz = (a1, a2, a3)
+
+ soft_actions_list.append(zzz)
+
+ if line.startswith('@PROCESSNAME_RE'):
+ a = line.partition(
+ '@PROCESSNAME_RE')[2].strip(' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ processname_re_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@CMDLINE_RE'):
+ a = line.partition(
+ '@CMDLINE_RE')[2].strip(' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ cmdline_re_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@UID_RE'):
+ a = line.partition(
+ '@UID_RE')[2].strip(' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ uid_re_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@CGROUP_V1_RE'):
+ a = line.partition(
+ '@CGROUP_V1_RE')[2].strip(' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ cgroup_v1_re_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@CGROUP_V2_RE'):
+ a = line.partition(
+ '@CGROUP_V2_RE')[2].strip(' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ cgroup_v2_re_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@REALPATH_RE'):
+ a = line.partition(
+ '@REALPATH_RE')[2].strip(' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ realpath_re_list.append((badness_adj, reg_exp))
+
+ if line.startswith('@ENVIRON_RE'):
+ a = line.partition(
+ '@ENVIRON_RE')[2].strip(' \n').partition(opt_separator)
+ badness_adj = a[0].strip(' ')
+ reg_exp = a[2].strip(' ')
+ valid_re(reg_exp)
+ environ_re_list.append((badness_adj, reg_exp))
+
+
+except PermissionError:
+ errprint('PermissionError', conf_err_mess)
+ exit(1)
+except UnicodeDecodeError:
+ errprint('UnicodeDecodeError', conf_err_mess)
+ exit(1)
+except IsADirectoryError:
+ errprint('IsADirectoryError', conf_err_mess)
+ exit(1)
+except IndexError:
+ errprint('IndexError', conf_err_mess)
+ exit(1)
+except FileNotFoundError:
+ errprint('FileNotFoundError', conf_err_mess)
+ exit(1)
+
+
+if processname_re_list == []:
+ regex_matching = False
+else:
+ regex_matching = True
+
+
+if cmdline_re_list == []:
+ re_match_cmdline = False
+else:
+ re_match_cmdline = True
+
+
+if uid_re_list == []:
+ re_match_uid = False
+else:
+ re_match_uid = True
+
+
+if environ_re_list == []:
+ re_match_environ = False
+else:
+ re_match_environ = True
+
+
+if realpath_re_list == []:
+ re_match_realpath = False
+else:
+ re_match_realpath = True
+
+
+if cgroup_v1_re_list == []:
+ re_match_cgroup_v1 = False
+else:
+ re_match_cgroup_v1 = True
+
+if cgroup_v2_re_list == []:
+ re_match_cgroup_v2 = False
+else:
+ re_match_cgroup_v2 = True
+
+
+# print(processname_re_list)
+# print(cmdline_re_list)
+# print(uid_re_list)
+# print(environ_re_list)
+# print(realpath_re_list)
+# print(cgroup_v1_re_list)
+# print(cgroup_v2_re_list)
+
+# print(soft_actions_list)
+
+if soft_actions_list == []:
+ soft_actions = False
+else:
+ soft_actions = True
+
+# print('soft_actions:', soft_actions)
+
+##########################################################################
+
+
+# extracting parameters from the dictionary
+# check for all necessary parameters
+# validation of all parameters
+psi_debug = conf_parse_bool('psi_debug')
+print_total_stat = conf_parse_bool('print_total_stat')
+print_proc_table = conf_parse_bool('print_proc_table')
+forbid_negative_badness = conf_parse_bool('forbid_negative_badness')
+print_victim_info = conf_parse_bool('print_victim_info')
+print_config = conf_parse_bool('print_config')
+print_mem_check_results = conf_parse_bool('print_mem_check_results')
+print_sleep_periods = conf_parse_bool('print_sleep_periods')
+gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings')
+gui_notifications = conf_parse_bool('gui_notifications')
+decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj')
+ignore_psi = conf_parse_bool('ignore_psi')
+ignore_zram = conf_parse_bool('ignore_zram')
+
+
+(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent
+ ) = calculate_percent('mem_min_sigterm')
+
+(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent
+ ) = calculate_percent('mem_min_sigkill')
+
+(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent
+ ) = calculate_percent('zram_max_sigterm')
+
+(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent
+ ) = calculate_percent('zram_max_sigkill')
+
+(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent
+ ) = calculate_percent('mem_min_warnings')
+
+(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent
+ ) = calculate_percent('zram_max_warnings')
+
+
+if 'rate_mem' in config_dict:
+ rate_mem = string_to_float_convert_test(config_dict['rate_mem'])
+ if rate_mem is None:
+ errprint('Invalid rate_mem value, not float\nExit')
+ exit(1)
+ if rate_mem <= 0:
+ errprint('rate_mem MUST be > 0\nExit')
+ exit(1)
+else:
+ errprint('rate_mem not in config\nExit')
+ exit(1)
+
+
+if 'rate_swap' in config_dict:
+ rate_swap = string_to_float_convert_test(config_dict['rate_swap'])
+ if rate_swap is None:
+ errprint('Invalid rate_swap value, not float\nExit')
+ exit(1)
+ if rate_swap <= 0:
+ errprint('rate_swap MUST be > 0\nExit')
+ exit(1)
+else:
+ errprint('rate_swap not in config\nExit')
+ exit(1)
+
+
+if 'rate_zram' in config_dict:
+ rate_zram = string_to_float_convert_test(config_dict['rate_zram'])
+ if rate_zram is None:
+ errprint('Invalid rate_zram value, not float\nExit')
+ exit(1)
+ if rate_zram <= 0:
+ errprint('rate_zram MUST be > 0\nExit')
+ exit(1)
+else:
+ errprint('rate_zram not in config\nExit')
+ exit(1)
+
+
+if 'swap_min_sigterm' in config_dict:
+ swap_min_sigterm = config_dict['swap_min_sigterm']
+else:
+ errprint('swap_min_sigterm not in config\nExit')
+ exit(1)
+
+
+if 'swap_min_sigkill' in config_dict:
+ swap_min_sigkill = config_dict['swap_min_sigkill']
+else:
+ errprint('swap_min_sigkill not in config\nExit')
+ exit(1)
+
+
+if 'min_delay_after_sigterm' in config_dict:
+ min_delay_after_sigterm = string_to_float_convert_test(
+ config_dict['min_delay_after_sigterm'])
+ if min_delay_after_sigterm is None:
+ errprint('Invalid min_delay_after_sigterm value, not float\nExit')
+ exit(1)
+ if min_delay_after_sigterm < 0:
+ errprint('min_delay_after_sigterm must be positiv\nExit')
+ exit(1)
+else:
+ errprint('min_delay_after_sigterm not in config\nExit')
+ exit(1)
+
+
+if 'psi_post_action_delay' in config_dict:
+ psi_post_action_delay = string_to_float_convert_test(
+ config_dict['psi_post_action_delay'])
+ if psi_post_action_delay is None:
+ errprint('Invalid psi_post_action_delay value, not float\nExit')
+ exit(1)
+ if psi_post_action_delay < 0:
+ errprint('psi_post_action_delay must be positive\nExit')
+ exit(1)
+else:
+ errprint('psi_post_action_delay not in config\nExit')
+ exit(1)
+
+
+if 'sigkill_psi_threshold' in config_dict:
+ sigkill_psi_threshold = string_to_float_convert_test(
+ config_dict['sigkill_psi_threshold'])
+ if sigkill_psi_threshold is None:
+ errprint('Invalid sigkill_psi_threshold value, not float\nExit')
+ exit(1)
+ if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100:
+ errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit')
+ exit(1)
+else:
+ errprint('sigkill_psi_threshold not in config\nExit')
+ exit(1)
+
+
+if 'sigterm_psi_threshold' in config_dict:
+ sigterm_psi_threshold = string_to_float_convert_test(
+ config_dict['sigterm_psi_threshold'])
+ if sigterm_psi_threshold is None:
+ errprint('Invalid sigterm_psi_threshold value, not float\nExit')
+ exit(1)
+ if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100:
+ errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit')
+ exit(1)
+else:
+ errprint('sigterm_psi_threshold not in config\nExit')
+ exit(1)
+
+
+if 'min_badness' in config_dict:
+ min_badness = string_to_int_convert_test(
+ config_dict['min_badness'])
+ if min_badness is None:
+ errprint('Invalid min_badness value, not integer\nExit')
+ exit(1)
+ if min_badness < 0 or min_badness > 1000:
+ errprint('Invalud min_badness value\nExit')
+ exit(1)
+else:
+ errprint('min_badness not in config\nExit')
+ exit(1)
+
+
+if 'oom_score_adj_max' in config_dict:
+ oom_score_adj_max = string_to_int_convert_test(
+ config_dict['oom_score_adj_max'])
+ if oom_score_adj_max is None:
+ errprint('Invalid oom_score_adj_max value, not integer\nExit')
+ exit(1)
+ if oom_score_adj_max < 0 or oom_score_adj_max > 1000:
+ errprint('Invalid oom_score_adj_max value\nExit')
+ exit(1)
+else:
+ errprint('oom_score_adj_max not in config\nExit')
+ exit(1)
+
+
+if 'min_time_between_warnings' in config_dict:
+ min_time_between_warnings = string_to_float_convert_test(
+ config_dict['min_time_between_warnings'])
+ if min_time_between_warnings is None:
+ errprint('Invalid min_time_between_warnings value, not float\nExit')
+ exit(1)
+ if min_time_between_warnings < 1 or min_time_between_warnings > 300:
+ errprint('min_time_between_warnings value out of range [1; 300]\nExit')
+ exit(1)
+else:
+ errprint('min_time_between_warnings not in config\nExit')
+ exit(1)
+
+
+if 'swap_min_warnings' in config_dict:
+ swap_min_warnings = config_dict['swap_min_warnings']
+else:
+ errprint('swap_min_warnings not in config\nExit')
+ exit(1)
+
+
+if 'max_ancestry_depth' in config_dict:
+ max_ancestry_depth = string_to_int_convert_test(
+ config_dict['max_ancestry_depth'])
+ if min_badness is None:
+ errprint('Invalid max_ancestry_depth value, not integer\nExit')
+ exit(1)
+ if max_ancestry_depth < 1:
+ errprint('Invalud max_ancestry_depth value\nExit')
+ exit(1)
+else:
+ errprint('max_ancestry_depth is not in config\nExit')
+ exit(1)
+
+
+if 'max_post_sigterm_victim_lifetime' in config_dict:
+ max_post_sigterm_victim_lifetime = string_to_float_convert_test(
+ config_dict['max_post_sigterm_victim_lifetime'])
+ if max_post_sigterm_victim_lifetime is None:
+ errprint('Invalid max_post_sigterm_victim_lifetime val'
+ 'ue, not float\nExit')
+ exit(1)
+ if max_post_sigterm_victim_lifetime < 0:
+ errprint('max_post_sigterm_victim_lifetime must be non-n'
+ 'egative number\nExit')
+ exit(1)
+else:
+ errprint('max_post_sigterm_victim_lifetime is not in config\nExit')
+ exit(1)
+
+
+if 'post_kill_exe' in config_dict:
+ post_kill_exe = config_dict['post_kill_exe']
+else:
+ errprint('post_kill_exe is not in config\nExit')
+ exit(1)
+
+
+if 'psi_path' in config_dict:
+ psi_path = config_dict['psi_path']
+else:
+ errprint('psi_path is not in config\nExit')
+ exit(1)
+
+
+
+
+
+
+
+if 'psi_target' in config_dict:
+ psi_target = config_dict['psi_target']
+else:
+ errprint('psi_target is not in config\nExit')
+ exit(1)
+
+
+
+
+
+
+
+
+
+
+
+
+
+if 'psi_metrics' in config_dict:
+ psi_metrics = config_dict['psi_metrics']
+else:
+ errprint('psi_metrics is not in config\nExit')
+ exit(1)
+
+
+if 'warning_exe' in config_dict:
+ warning_exe = config_dict['warning_exe']
+ if warning_exe != '':
+ check_warning_exe = True
+ else:
+ check_warning_exe = False
+else:
+ errprint('warning_exe is not in config\nExit')
+ exit(1)
+
+
+if 'extra_table_info' in config_dict:
+ extra_table_info = config_dict['extra_table_info']
+ if (extra_table_info != 'None' and
+ extra_table_info != 'cgroup_v1' and
+ extra_table_info != 'cgroup_v2' and
+ extra_table_info != 'cmdline' and
+ extra_table_info != 'environ' and
+ extra_table_info != 'realpath' and
+ extra_table_info != 'All'):
+
+ errprint('Invalid config: invalid extra_table_info value\nExit')
+ exit(1)
+else:
+ errprint('Invalid config: extra_table_info is not in config\nExit')
+ exit(1)
+
+
+separate_log = conf_parse_bool('separate_log')
+
+if separate_log:
+
+ import logging
+ from logging import basicConfig
+ from logging import info
+
+ log_dir = '/var/log/nohang'
+
+ try:
+ os.mkdir(log_dir)
+ except PermissionError:
+ print('ERROR: can not create log dir')
+ except FileExistsError:
+ pass
+
+ logfile = log_dir + '/nohang.log'
+
+ try:
+ with open(logfile, 'a') as f:
+ pass
+ except FileNotFoundError:
+ print('ERROR: log FileNotFoundError')
+ except PermissionError:
+ print('ERROR: log PermissionError')
+
+ try:
+ basicConfig(
+ filename=logfile,
+ level=logging.INFO,
+ format="%(asctime)s: %(message)s")
+ except PermissionError:
+ errprint('ERROR: Permission denied: {}'.format(logfile))
+ except FileNotFoundError:
+ errprint('ERROR: FileNotFoundError: {}'.format(logfile))
+
+
+if 'min_mem_report_interval' in config_dict:
+ min_mem_report_interval = string_to_float_convert_test(
+ config_dict['min_mem_report_interval'])
+ if min_mem_report_interval is None:
+ errprint('Invalid min_mem_report_interval value, not float\nExit')
+ exit(1)
+ if min_mem_report_interval < 0:
+ errprint('min_mem_report_interval must be non-negative number\nExit')
+ exit(1)
+else:
+ errprint('min_mem_report_interval is not in config\nExit')
+ exit(1)
+
+
+if 'max_sleep' in config_dict:
+ max_sleep = string_to_float_convert_test(
+ config_dict['max_sleep'])
+ if max_sleep is None:
+ errprint('Invalid max_sleep value, not float\nExit')
+ exit(1)
+ if max_sleep <= 0:
+ errprint('max_sleep must be positive number\nExit')
+ exit(1)
+else:
+ errprint('max_sleep is not in config\nExit')
+ exit(1)
+
+
+if 'min_sleep' in config_dict:
+ min_sleep = string_to_float_convert_test(
+ config_dict['min_sleep'])
+ if min_sleep is None:
+ errprint('Invalid min_sleep value, not float\nExit')
+ exit(1)
+ if min_sleep <= 0:
+ errprint('min_sleep must be positive number\nExit')
+ exit(1)
+else:
+ errprint('min_sleep is not in config\nExit')
+ exit(1)
+
+
+if 'over_sleep' in config_dict:
+ over_sleep = string_to_float_convert_test(
+ config_dict['over_sleep'])
+ if over_sleep is None:
+ errprint('Invalid over_sleep value, not float\nExit')
+ exit(1)
+ if over_sleep <= 0:
+ errprint('over_sleep must be positive number\nExit')
+ exit(1)
+else:
+ errprint('over_sleep is not in config\nExit')
+ exit(1)
+
+
+if max_sleep < min_sleep:
+ errprint(
+ 'max_sleep value must not exceed min_sleep value.\nExit'
+ )
+ exit(1)
+
+
+if min_sleep < over_sleep:
+ errprint(
+ 'min_sleep value must not exceed over_sleep value.\nExit'
+ )
+ exit(1)
+
+
+if max_sleep == min_sleep:
+ stable_sleep = True
+else:
+ stable_sleep = False
+
+
+if print_proc_table_flag:
+
+ if not root:
+ log('WARNING: effective UID != 0; euid={}; processes with other e'
+ 'uids will be invisible for nohang'.format(self_uid))
+
+ func_print_proc_table()
+
+
+##########################################################################
+
+
+psi_support = os.path.exists(psi_path)
+
+
+##########################################################################
+
+
+# Get KiB levels if it's possible.
+
+
+swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm)
+swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill)
+swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings)
+
+
+swap_term_is_percent = swap_min_sigterm_tuple[1]
+if swap_term_is_percent:
+ swap_min_sigterm_percent = swap_min_sigterm_tuple[0]
+else:
+ swap_min_sigterm_kb = swap_min_sigterm_tuple[0]
+
+
+swap_kill_is_percent = swap_min_sigkill_tuple[1]
+if swap_kill_is_percent:
+ swap_min_sigkill_percent = swap_min_sigkill_tuple[0]
+else:
+ swap_min_sigkill_kb = swap_min_sigkill_tuple[0]
+
+
+swap_warn_is_percent = swap_min_warnings_tuple[1]
+if swap_warn_is_percent:
+ swap_min_warnings_percent = swap_min_warnings_tuple[0]
+else:
+ swap_min_warnings_kb = swap_min_warnings_tuple[0]
+
+
+##########################################################################
+
+# outdated section, need fixes
+
+if print_config:
+
+ print(
+ '\n1. Memory levels to respond to as an OOM threat\n[display'
+ 'ing these options need fix]\n')
+
+ print('mem_min_sigterm: {} MiB, {} %'.format(
+ round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1)))
+ print('mem_min_sigkill: {} MiB, {} %'.format(
+ round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1)))
+
+ print('swap_min_sigterm: {}'.format(swap_min_sigterm))
+ print('swap_min_sigkill: {}'.format(swap_min_sigkill))
+
+ print('zram_max_sigterm: {} MiB, {} %'.format(
+ round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1)))
+ print('zram_max_sigkill: {} MiB, {} %'.format(
+ round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1)))
+
+ print('\n2. The frequency of checking the level of available m'
+ 'emory (and CPU usage)\n')
+ print('rate_mem: {}'.format(rate_mem))
+ print('rate_swap: {}'.format(rate_swap))
+ print('rate_zram: {}'.format(rate_zram))
+
+ print('\n3. The prevention of killing innocent victims\n')
+ print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm))
+ print('min_badness: {}'.format(min_badness))
+
+ print('decrease_oom_score_adj: {}'.format(
+ decrease_oom_score_adj
+ ))
+ if decrease_oom_score_adj:
+ print('oom_score_adj_max: {}'.format(oom_score_adj_max))
+
+ print('\n4. Impact on the badness of processes via matching their'
+ ' names, cmdlines ir UIDs with regular expressions\n')
+
+ print('(todo)')
+
+ print('\n5. The execution of a specific command instead of sen'
+ 'ding the\nSIGTERM signal\n')
+
+ print('\n6. GUI notifications:\n- OOM prevention results and\n- low m'
+ 'emory warnings\n')
+ print('gui_notifications: {}'.format(gui_notifications))
+
+ print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings))
+ if gui_low_memory_warnings:
+ print('min_time_between_warnings: {}'.format(
+ min_time_between_warnings))
+
+ print('mem_min_warnings: {} MiB, {} %'.format(
+ round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1)))
+
+ print('swap_min_warnings: {}'.format(swap_min_warnings))
+
+ print('zram_max_warnings: {} MiB, {} %'.format(
+ round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1)))
+
+ print('\n7. Output verbosity\n')
+ print('print_config: {}'.format(print_config))
+ print('print_mem_check_results: {}'.format(print_mem_check_results))
+ print('print_sleep_periods: {}\n'.format(print_sleep_periods))
+
+
+##########################################################################
+
+
+# for calculating the column width when printing mem and zram
+mem_len = len(str(round(mem_total / 1024.0)))
+
+if gui_notifications:
+ notify_sig_dict = {SIGKILL: 'Killing',
+ SIGTERM: 'Terminating'}
+
+
+# convert rates from MiB/s to KiB/s
+rate_mem = rate_mem * 1024
+rate_swap = rate_swap * 1024
+rate_zram = rate_zram * 1024
+
+
+warn_time_now = 0
+warn_time_delta = 1000
+warn_timer = 0
+
+
+##########################################################################
+
+
+if not root:
+ log('WARNING: effective UID != 0; euid={}; processes with other e'
+ 'uids will be invisible for nohang'.format(self_uid))
+
+
+# Try to lock all memory
+
+mlockall()
+
+##########################################################################
+
+
+# print_self_rss()
+
+
+psi_avg_string = '' # will be overwritten if PSI monitoring enabled
+
+mem_used_zram = 0
+
+if psi_support and not ignore_psi:
+ psi_t0 = time()
+
+
+if print_mem_check_results:
+
+ # to find delta mem
+ wt2 = 0
+ new_mem = 0
+
+ # init mem report interval
+ report0 = 0
+
+
+# handle signals
+for i in sig_list:
+ signal(i, signal_handler)
+
+
+CHECK_PSI = False
+if psi_support and not ignore_psi:
+ CHECK_PSI = True
+
+
+CHECK_ZRAM = not ignore_zram
+
+log('Monitoring has started!')
+
+stdout.flush()
+
+
+
+
+
+
+i = cgroup2_root()
+
+print(i)
+print(psi_target)
+
+i = /foo
+
+
+##########################################################################
+
+
+while True:
+
+ # Q = time()
+
+ # FIND VALUES: mem, swap, zram, psi
+
+ mem_available, swap_total, swap_free = check_mem_and_swap()
+
+ # if swap_min_sigkill is set in percent
+ if swap_kill_is_percent:
+ swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0
+
+ if swap_term_is_percent:
+ swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0
+
+ if swap_warn_is_percent:
+ swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0
+
+ if swap_total > swap_min_sigkill_kb:
+ swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1))
+ else:
+ swap_sigkill_pc = '-'
+
+ if swap_total > swap_min_sigterm_kb:
+ swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1))
+ else:
+ swap_sigterm_pc = '-'
+
+ if CHECK_ZRAM:
+ mem_used_zram = check_zram()
+
+ if CHECK_PSI:
+ psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
+ if time() - psi_t0 >= psi_post_action_delay:
+ psi_post_action_delay_exceeded = True
+ else:
+ psi_post_action_delay_exceeded = False
+
+ if print_mem_check_results:
+ psi_avg_string = 'PSI avg value: {} | '.format(
+ str(psi_avg_value).rjust(6))
+
+ if print_mem_check_results:
+
+ wt1 = time()
+
+ delta = (mem_available + swap_free) - new_mem
+
+ t_cycle = wt1 - wt2
+
+ report_delta = wt1 - report0
+
+ if report_delta >= min_mem_report_interval:
+
+ mem_report = True
+ new_mem = mem_available + swap_free
+
+ report0 = wt1
+
+ else:
+ mem_report = False
+
+ wt2 = time()
+
+ if mem_report:
+
+ speed = delta / 1024.0 / report_delta
+ speed_info = ' | dMem: {} M/s'.format(
+ str(round(speed)).rjust(5)
+ )
+
+ # Calculate 'swap-column' width
+ swap_len = len(str(round(swap_total / 1024.0)))
+
+ # Output available mem sizes
+ if swap_total == 0 and mem_used_zram == 0:
+ log('{}MemAvail: {} M, {} %{}'.format(
+ psi_avg_string,
+ human(mem_available, mem_len),
+ just_percent_mem(mem_available / mem_total),
+ speed_info
+ )
+ )
+
+ elif swap_total > 0 and mem_used_zram == 0:
+ log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format(
+ psi_avg_string,
+ human(mem_available, mem_len),
+ just_percent_mem(mem_available / mem_total),
+ human(swap_free, swap_len),
+ just_percent_swap(swap_free / (swap_total + 0.1)),
+ speed_info
+ )
+ )
+
+ else:
+ log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
+ 'UsedZram: {} M, {} %{}'.format(
+ psi_avg_string,
+ human(mem_available, mem_len),
+ just_percent_mem(mem_available / mem_total),
+ human(swap_free, swap_len),
+ just_percent_swap(swap_free / (swap_total + 0.1)),
+ human(mem_used_zram, mem_len),
+ just_percent_mem(mem_used_zram / mem_total),
+ speed_info
+ )
+ )
+
+ ###########################################################################
+
+ # CHECK HARD THRESHOLDS (SIGKILL LEVEL)
+
+ if (mem_available <= mem_min_sigkill_kb and
+ swap_free <= swap_min_sigkill_kb):
+
+ mem_info = 'Hard threshold exceeded\nMemory status that requ' \
+ 'ires corrective actions:' \
+ '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
+ 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
+ 'p_min_sigkill [{} MiB, {} %]'.format(
+ kib_to_mib(mem_available),
+ percent(mem_available / mem_total),
+ kib_to_mib(mem_min_sigkill_kb),
+ percent(mem_min_sigkill_kb / mem_total),
+ kib_to_mib(swap_free),
+ percent(swap_free / (swap_total + 0.1)),
+ kib_to_mib(swap_min_sigkill_kb),
+ swap_sigkill_pc)
+
+ implement_corrective_action(SIGKILL)
+ psi_t0 = time()
+ continue
+
+ if CHECK_ZRAM:
+ if mem_used_zram >= zram_max_sigkill_kb:
+
+ mem_info = 'Hard threshold exceeded\nMemory status that requir' \
+ 'es corrective actions:' \
+ '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
+ 'kill [{} MiB, {} %]'.format(
+ kib_to_mib(mem_used_zram),
+ percent(mem_used_zram / mem_total),
+ kib_to_mib(zram_max_sigkill_kb),
+ percent(zram_max_sigkill_kb / mem_total))
+
+ implement_corrective_action(SIGKILL)
+ psi_t0 = time()
+ continue
+
+ if CHECK_PSI:
+ if psi_avg_value >= sigkill_psi_threshold:
+ sigkill_psi_exceeded = True
+ else:
+ sigkill_psi_exceeded = False
+
+ if sigkill_psi_exceeded and psi_post_action_delay_exceeded:
+
+ mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \
+ 'old ({})'.format(
+ psi_avg_value, sigkill_psi_threshold)
+
+ implement_corrective_action(SIGKILL)
+ psi_t0 = time()
+ continue
+
+ ###########################################################################
+
+ # CHECK SOFT THRESHOLDS (SIGTERM LEVEL)
+
+ if (mem_available <= mem_min_sigterm_kb and
+ swap_free <= swap_min_sigterm_kb):
+
+ mem_info = 'Soft threshold exceeded\nMemory status that requi' \
+ 'res corrective actions:' \
+ '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
+ 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
+ 'p_min_sigterm [{} MiB, {} %]'.format(
+ kib_to_mib(mem_available),
+ percent(mem_available / mem_total),
+ kib_to_mib(mem_min_sigterm_kb),
+ round(mem_min_sigterm_percent, 1),
+ kib_to_mib(swap_free),
+ percent(swap_free / (swap_total + 0.1)),
+ kib_to_mib(swap_min_sigterm_kb),
+ swap_sigterm_pc)
+
+ implement_corrective_action(SIGTERM)
+ psi_t0 = time()
+ continue
+
+ if CHECK_ZRAM:
+ if mem_used_zram >= zram_max_sigterm_kb:
+
+ mem_info = 'Soft threshold exceeded\nMemory status that require' \
+ 's corrective actions:\n MemUsedZram [{} MiB, {} %] >= zra' \
+ 'm_max_sigterm [{} M, {} %]'.format(
+ kib_to_mib(mem_used_zram),
+ percent(mem_used_zram / mem_total),
+ kib_to_mib(zram_max_sigterm_kb),
+ percent(zram_max_sigterm_kb / mem_total))
+
+ implement_corrective_action(SIGTERM)
+ psi_t0 = time()
+ continue
+
+ if CHECK_PSI:
+ if psi_avg_value >= sigterm_psi_threshold:
+ sigterm_psi_exceeded = True
+ else:
+ sigterm_psi_exceeded = False
+
+ if psi_debug:
+ log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps'
+ 'i_post_action_delay_exceeded: {}'.format(
+ sigterm_psi_exceeded,
+ sigkill_psi_exceeded,
+ psi_post_action_delay_exceeded))
+
+ if sigterm_psi_exceeded and psi_post_action_delay_exceeded:
+
+ mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \
+ 'shold ({})'.format(psi_avg_value, sigterm_psi_threshold)
+
+ implement_corrective_action(SIGTERM)
+ psi_t0 = time()
+ continue
+
+ ###########################################################################
+
+ if gui_low_memory_warnings:
+
+ if (mem_available <= mem_min_warnings_kb and
+ swap_free <= swap_min_warnings_kb + 0.1 or
+ mem_used_zram >= zram_max_warnings_kb):
+
+ warn_time_delta = time() - warn_time_now
+ warn_time_now = time()
+ warn_timer += warn_time_delta
+ if warn_timer > min_time_between_warnings:
+ send_notify_warn()
+ warn_timer = 0
+
+
+ # x = time() - Q
+ # print(x * 1000)
+
+ sleep_after_check_mem()