Minor fixes

This commit is contained in:
Alexey Avramov 2020-12-05 20:30:04 +09:00
parent 4d75418a01
commit cbe46d9682

View File

@ -100,7 +100,6 @@ def memload():
mem_available_index = mem_list_names.index('MemAvailable') mem_available_index = mem_list_names.index('MemAvailable')
except ValueError: except ValueError:
errprint('Your Linux kernel is too old, Linux 3.14+ required\nExit') errprint('Your Linux kernel is too old, Linux 3.14+ required\nExit')
swap_total_index = mem_list_names.index('SwapTotal')
swap_free_index = mem_list_names.index('SwapFree') swap_free_index = mem_list_names.index('SwapFree')
def check_mem_and_swap(): def check_mem_and_swap():
@ -110,13 +109,10 @@ def memload():
if n == mem_available_index: if n == mem_available_index:
mem_available = int(line.split(':')[1][:-4]) mem_available = int(line.split(':')[1][:-4])
continue continue
if n == swap_total_index:
swap_total = int(line.split(':')[1][:-4])
continue
if n == swap_free_index: if n == swap_free_index:
swap_free = int(line.split(':')[1][:-4]) swap_free = int(line.split(':')[1][:-4])
break break
return mem_available, swap_total, swap_free return mem_available, swap_free
def print_mem(mem_available, swap_free): def print_mem(mem_available, swap_free):
print('\033MMemAvailable: {} MiB, SwapFree: {} MiB ' print('\033MMemAvailable: {} MiB, SwapFree: {} MiB '
@ -146,7 +142,7 @@ def memload():
while True: while True:
try: try:
mem_available, swap_total, swap_free = check_mem_and_swap() mem_available, swap_free = check_mem_and_swap()
x = mem_available + swap_free x = mem_available + swap_free
if x <= 1024 * 40: # 40 MiB if x <= 1024 * 40: # 40 MiB
print_mem(mem_available, swap_free) print_mem(mem_available, swap_free)
@ -275,9 +271,7 @@ def re_pid_environ(pid):
try: try:
with open('/proc/' + pid + '/environ', 'rb') as f: with open('/proc/' + pid + '/environ', 'rb') as f:
env = f.read().decode('utf-8', 'ignore') env = f.read().decode('utf-8', 'ignore')
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return None
except ProcessLookupError:
return None return None
if display_env in env and dbus_env in env and user_env in env: if display_env in env and dbus_env in env and user_env in env:
@ -772,11 +766,7 @@ def pid_to_rss(pid):
try: try:
rss = int(rline1( rss = int(rline1(
'/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE '/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE
except IndexError: except (IndexError, FileNotFoundError, ProcessLookupError):
rss = None
except FileNotFoundError:
rss = None
except ProcessLookupError:
rss = None rss = None
return rss return rss
@ -787,11 +777,7 @@ def pid_to_vm_size(pid):
try: try:
vm_size = int(rline1( vm_size = int(rline1(
'/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE '/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE
except IndexError: except (IndexError, FileNotFoundError, ProcessLookupError):
vm_size = None
except FileNotFoundError:
vm_size = None
except ProcessLookupError:
vm_size = None vm_size = None
return vm_size return vm_size
@ -904,7 +890,7 @@ def pid_to_cgroup_v1(pid):
if index == cgroup_v1_index: if index == cgroup_v1_index:
cgroup_v1 = '/' + line.partition('/')[2][:-1] cgroup_v1 = '/' + line.partition('/')[2][:-1]
return cgroup_v1 return cgroup_v1
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return '' return ''
@ -918,7 +904,7 @@ def pid_to_cgroup_v2(pid):
if index == cgroup_v2_index: if index == cgroup_v2_index:
cgroup_v2 = line[3:-1] cgroup_v2 = line[3:-1]
return cgroup_v2 return cgroup_v2
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return '' return ''
@ -957,21 +943,17 @@ def get_victim_id(pid):
try: try:
return rline1('/proc/' + pid + '/stat').rpartition( return rline1('/proc/' + pid + '/stat').rpartition(
')')[2].split(' ')[20] + '_pid' + pid ')')[2].split(' ')[20] + '_pid' + pid
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return ''
except ProcessLookupError:
return '' return ''
def pid_to_state(pid): def pid_to_state(pid):
""" """ ПЕРЕПИСАТЬ НАХРЕН - С БИН ЧТЕНИЕМ И НУЛЕВЫМ БУФЕРОМ==
""" """
try: try:
with open('/proc/' + pid + '/stat', 'rb') as f: with open('/proc/' + pid + '/stat', 'rb') as f:
return f.read(40).decode('utf-8', 'ignore').rpartition(')')[2][1] return f.read(40).decode('utf-8', 'ignore').rpartition(')')[2][1]
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return ''
except ProcessLookupError:
return '' return ''
except IndexError: except IndexError:
with open('/proc/' + pid + '/stat', 'rb') as f: with open('/proc/' + pid + '/stat', 'rb') as f:
@ -984,9 +966,7 @@ def pid_to_name(pid):
try: try:
with open('/proc/{}/comm'.format(pid), 'rb', buffering=0) as f: with open('/proc/{}/comm'.format(pid), 'rb', buffering=0) as f:
return f.read().decode('utf-8', 'ignore')[:-1] return f.read().decode('utf-8', 'ignore')[:-1]
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return ''
except ProcessLookupError:
return '' return ''
@ -998,9 +978,7 @@ def pid_to_ppid(pid):
for n, line in enumerate(f): for n, line in enumerate(f):
if n is ppid_index: if n is ppid_index:
return line.split('\t')[1].strip() return line.split('\t')[1].strip()
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return ''
except ProcessLookupError:
return '' return ''
except UnicodeDecodeError: except UnicodeDecodeError:
with open('/proc/' + pid + '/status', 'rb') as f: with open('/proc/' + pid + '/status', 'rb') as f:
@ -1044,9 +1022,7 @@ def pid_to_cmdline(pid):
with open('/proc/' + pid + '/cmdline', 'rb') as f: with open('/proc/' + pid + '/cmdline', 'rb') as f:
return f.read().decode('utf-8', 'ignore').replace( return f.read().decode('utf-8', 'ignore').replace(
'\x00', ' ').rstrip() '\x00', ' ').rstrip()
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return ''
except ProcessLookupError:
return '' return ''
@ -1061,9 +1037,7 @@ def pid_to_environ(pid):
with open('/proc/' + pid + '/environ', 'rb') as f: with open('/proc/' + pid + '/environ', 'rb') as f:
return f.read().decode('utf-8', 'ignore').replace( return f.read().decode('utf-8', 'ignore').replace(
'\x00', ' ').rstrip() '\x00', ' ').rstrip()
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return ''
except ProcessLookupError:
return '' return ''
@ -1072,11 +1046,7 @@ def pid_to_realpath(pid):
""" """
try: try:
return os.path.realpath('/proc/{}/exe'.format(pid)) return os.path.realpath('/proc/{}/exe'.format(pid))
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError, PermissionError):
return ''
except ProcessLookupError:
return ''
except PermissionError:
return '' return ''
@ -1085,11 +1055,7 @@ def pid_to_cwd(pid):
""" """
try: try:
return os.path.realpath('/proc/{}/cwd'.format(pid)) return os.path.realpath('/proc/{}/cwd'.format(pid))
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError, PermissionError):
return ''
except ProcessLookupError:
return ''
except PermissionError:
return '' return ''
@ -1099,9 +1065,7 @@ def pid_to_uid(pid):
with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f: with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f:
f_list = f.read().decode('utf-8', 'ignore').split('\n') f_list = f.read().decode('utf-8', 'ignore').split('\n')
return f_list[uid_index].split('\t')[2] return f_list[uid_index].split('\t')[2]
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return ''
except ProcessLookupError:
return '' return ''
@ -1235,9 +1199,7 @@ def pid_to_badness(pid, oom_score):
return badness, oom_score return badness, oom_score
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError):
return None, None
except ProcessLookupError:
return None, None return None, None
@ -1275,13 +1237,7 @@ def pid_to_status(pid):
return name, state, ppid, uid, vm_size, vm_rss, vm_swap return name, state, ppid, uid, vm_size, vm_rss, vm_swap
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError, ValueError):
return None
except ProcessLookupError:
return None
except ValueError:
return None return None
@ -1308,24 +1264,13 @@ def mlockall():
MCL_CURRENT = 1 MCL_CURRENT = 1
MCL_FUTURE = 2 MCL_FUTURE = 2
MCL_ONFAULT = 4 MCL_ONFAULT = 4
libc = CDLL(None, use_errno=True)
libc = CDLL('libc.so.6', use_errno=True) result = libc.mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)
result = libc.mlockall(
MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
)
if result != 0: if result != 0:
result = libc.mlockall( result = libc.mlockall(MCL_CURRENT | MCL_FUTURE)
MCL_CURRENT | MCL_FUTURE
)
if result != 0: if result != 0:
log('WARNING: cannot lock all memory: [Errno {}]'.format(result)) log('WARNING: cannot lock process memory: [Errno {}]'.format(
else: result))
pass
# log('All memory locked with MCL_CURRENT | MCL_FUTURE')
else:
pass
# log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
def update_stat_dict(key): def update_stat_dict(key):
@ -1358,25 +1303,25 @@ def print_stat_dict():
def find_psi_metrics_value(psi_path, psi_metrics): def find_psi_metrics_value(psi_path, psi_metrics):
""" """
""" """
foo = read_path(psi_path) foooo = read_path(psi_path)
if foo is None: if foooo is None:
return None return None
try: try:
if psi_metrics == 'some_avg10': if psi_metrics == 'some_avg10':
return float(foo.split('\n')[0].split(' ')[1].split('=')[1]) return float(foooo.split('\n')[0].split(' ')[1].split('=')[1])
if psi_metrics == 'some_avg60': if psi_metrics == 'some_avg60':
return float(foo.split('\n')[0].split(' ')[2].split('=')[1]) return float(foooo.split('\n')[0].split(' ')[2].split('=')[1])
if psi_metrics == 'some_avg300': if psi_metrics == 'some_avg300':
return float(foo.split('\n')[0].split(' ')[3].split('=')[1]) return float(foooo.split('\n')[0].split(' ')[3].split('=')[1])
if psi_metrics == 'full_avg10': if psi_metrics == 'full_avg10':
return float(foo.split('\n')[1].split(' ')[1].split('=')[1]) return float(foooo.split('\n')[1].split(' ')[1].split('=')[1])
if psi_metrics == 'full_avg60': if psi_metrics == 'full_avg60':
return float(foo.split('\n')[1].split(' ')[2].split('=')[1]) return float(foooo.split('\n')[1].split(' ')[2].split('=')[1])
if psi_metrics == 'full_avg300': if psi_metrics == 'full_avg300':
return float(foo.split('\n')[1].split(' ')[3].split('=')[1]) return float(foooo.split('\n')[1].split(' ')[3].split('=')[1])
except Exception as e: except Exception as e:
if debug_psi: if debug_psi:
@ -1466,21 +1411,30 @@ def check_zram():
def format_time(t): def format_time(t):
""" """
""" """
t = int(t) total_s = int(t)
if t < 60: if total_s < 60:
return '{}s'.format(t) return '{}s'.format(round(t, 1))
if t > 3600: if total_s < 3600:
h = t // 3600 total_m = total_s // 60
s0 = t - h * 3600 mod_s = total_s % 60
m = s0 // 60 return '{}min {}s'.format(total_m, mod_s)
s = s0 % 60
return '{}h {}min {}s'.format(h, m, s)
m = t // 60 if total_s < 86400:
s = t % 60 total_m = total_s // 60
return '{}min {}s'.format(m, s) mod_s = total_s % 60
total_h = total_m // 60
mod_m = total_m % 60
return '{}h {}min {}s'.format(total_h, mod_m, mod_s)
total_m = total_s // 60
mod_s = total_s % 60
total_h = total_m // 60
mod_m = total_m % 60
total_d = total_h // 24
mod_h = total_h % 24
return '{}d {}h {}min {}s'.format(total_d, mod_h, mod_m, mod_s)
def string_to_float_convert_test(string): def string_to_float_convert_test(string):
@ -1577,13 +1531,8 @@ def is_alive(pid):
rss = f.read().decode().split(' ')[1] rss = f.read().decode().split(' ')[1]
if rss != '0': if rss != '0':
return True return True
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError, NotADirectoryError,
return False PermissionError):
except ProcessLookupError:
return False
except NotADirectoryError:
return False
except PermissionError:
return False return False
@ -1593,7 +1542,7 @@ def alive_pid_list():
pid_list = [] pid_list = []
for pid in os.listdir('/proc'): for pid in os.listdir('/proc'):
if pid[0].isdecimal() is False: if not pid[0].isdecimal():
continue continue
if is_alive(pid): if is_alive(pid):
@ -1611,24 +1560,16 @@ def pid_to_oom_score(pid):
try: try:
with open('/proc/{}/oom_score'.format(pid), 'rb', buffering=0) as f: with open('/proc/{}/oom_score'.format(pid), 'rb', buffering=0) as f:
return int(f.read()) return int(f.read())
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError, NotADirectoryError):
return 0
except ProcessLookupError:
return 0
except NotADirectoryError:
return 0 return 0
def pid_to_oom_score_adj(pid): def pid_to_oom_score_adj(pid):
try: try:
with open('/proc/{}/oom_score_adj'.format(pid), 'rb', buffering=0 with open(
) as f: '/proc/{}/oom_score_adj'.format(pid), 'rb', buffering=0) as f:
return int(f.read()) return int(f.read())
except FileNotFoundError: except (FileNotFoundError, ProcessLookupError, NotADirectoryError):
return 0
except ProcessLookupError:
return 0
except NotADirectoryError:
return 0 return 0
@ -1639,7 +1580,7 @@ def badness_pid_list():
for pid in os.listdir('/proc'): for pid in os.listdir('/proc'):
o = pid_to_oom_score(pid) o = pid_to_oom_score(pid)
if o >= 1: if o >= 1:
if pid[0].isdecimal() is False: if not pid[0].isdecimal():
continue continue
if pid == self_pid or pid == '1': if pid == self_pid or pid == '1':
continue continue
@ -1810,9 +1751,7 @@ def find_victim(_print_proc_table):
str(vm_rss).rjust(5), str(vm_rss).rjust(5),
str(vm_swap).rjust(6), str(vm_swap).rjust(6),
name.ljust(15), name.ljust(15),
extra_table_line extra_table_line))
)
)
pid_badness_list.append((pid, badness)) pid_badness_list.append((pid, badness))
@ -1839,15 +1778,12 @@ def find_victim(_print_proc_table):
log('Found {} tasks with non-zero VmRSS (except init and self)'.format( log('Found {} tasks with non-zero VmRSS (except init and self)'.format(
real_proc_num)) real_proc_num))
log( log('Process with highest badness (found in {}ms):\n PID: {}, Na'
'Process with highest badness (found in {}ms):\n PID: {}, Na'
'me: {}, badness: {}'.format( 'me: {}, badness: {}'.format(
round((monotonic() - ft1) * 1000), round((monotonic() - ft1) * 1000),
pid, pid,
victim_name, victim_name,
victim_badness victim_badness))
)
)
return pid, victim_badness, victim_name, victim_id return pid, victim_badness, victim_name, victim_id
@ -1900,14 +1836,8 @@ def find_victim_info(pid, victim_badness, name):
oom_score = pid_to_oom_score(pid) oom_score = pid_to_oom_score(pid)
oom_score_adj = pid_to_oom_score_adj(pid) oom_score_adj = pid_to_oom_score_adj(pid)
except IndexError: except (IndexError, ValueError):
x = 'The victim died in the search process: IndexError' x = 'Selected process died before corrective action'
log(x)
update_stat_dict(x)
print_stat_dict()
return None
except ValueError:
x = 'The victim died in the search process: ValueError'
log(x) log(x)
update_stat_dict(x) update_stat_dict(x)
print_stat_dict() print_stat_dict()
@ -1922,7 +1852,7 @@ def find_victim_info(pid, victim_badness, name):
victim_cgroup_v2 = pid_to_cgroup_v2(pid) victim_cgroup_v2 = pid_to_cgroup_v2(pid)
except FileNotFoundError: except FileNotFoundError:
x = 'The victim died in the search process: FileNotFoundError' x = 'Selected process died before corrective action'
log(x) log(x)
update_stat_dict(x) update_stat_dict(x)
print_stat_dict() print_stat_dict()
@ -1930,7 +1860,7 @@ def find_victim_info(pid, victim_badness, name):
ancestry = pid_to_ancestry(pid, max_victim_ancestry_depth) ancestry = pid_to_ancestry(pid, max_victim_ancestry_depth)
if print_victim_cmdline is False: if not print_victim_cmdline:
cmdline = '' cmdline = ''
c1 = '' c1 = ''
else: else:
@ -1962,25 +1892,19 @@ def find_victim_info(pid, victim_badness, name):
uid, uid,
nssid, pid_to_name(nssid), nssid, pid_to_name(nssid),
victim_lifetime, victim_lifetime,
victim_badness, victim_badness,
oom_score, oom_score,
oom_score_adj, oom_score_adj,
vm_size, vm_size,
vm_rss, vm_rss,
detailed_rss_info, detailed_rss_info,
vm_swap, vm_swap,
victim_cgroup_v1, victim_cgroup_v1,
victim_cgroup_v2, victim_cgroup_v2,
ancestry, ancestry,
c1, cmdline, c1, cmdline,
realpath, realpath,
cwd cwd)
)
return victim_info return victim_info
@ -2081,20 +2005,14 @@ def check_zram_ex():
""" """
mem_used_zram = check_zram() mem_used_zram = check_zram()
if mem_available <= hard_threshold_min_mem_kb: ma_hard_threshold_exceded = bool(
ma_hard_threshold_exceded = True mem_available <= hard_threshold_min_mem_kb)
else:
ma_hard_threshold_exceded = False
if mem_available <= soft_threshold_min_mem_kb: ma_soft_threshold_exceded = bool(
ma_soft_threshold_exceded = True mem_available <= soft_threshold_min_mem_kb)
else:
ma_soft_threshold_exceded = False
if mem_available <= warning_threshold_min_mem_kb: ma_warning_threshold_exceded = bool(
ma_warning_threshold_exceded = True mem_available <= warning_threshold_min_mem_kb)
else:
ma_warning_threshold_exceded = False
if (mem_used_zram >= hard_threshold_max_zram_kb and if (mem_used_zram >= hard_threshold_max_zram_kb and
ma_hard_threshold_exceded): ma_hard_threshold_exceded):
@ -2140,25 +2058,18 @@ def check_zram_ex():
return None, None, mem_used_zram return None, None, mem_used_zram
def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0, def check_psi_ex(psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
mem_available): mem_available):
""" """
""" """
ma_hard_threshold_exceded = bool(
mem_available <= hard_threshold_min_mem_kb)
if mem_available <= hard_threshold_min_mem_kb: ma_soft_threshold_exceded = bool(
ma_hard_threshold_exceded = True mem_available <= soft_threshold_min_mem_kb)
else:
ma_hard_threshold_exceded = False
if mem_available <= soft_threshold_min_mem_kb: ma_warning_threshold_exceded = bool(
ma_soft_threshold_exceded = True mem_available <= warning_threshold_min_mem_kb)
else:
ma_soft_threshold_exceded = False
if mem_available <= warning_threshold_min_mem_kb:
ma_warning_threshold_exceded = True
else:
ma_warning_threshold_exceded = False
if not (ma_warning_threshold_exceded or ma_soft_threshold_exceded or if not (ma_warning_threshold_exceded or ma_soft_threshold_exceded or
ma_hard_threshold_exceded) or swap_total == 0: ma_hard_threshold_exceded) or swap_total == 0:
@ -2168,7 +2079,7 @@ def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
'lds of available memory is exceeded') 'lds of available memory is exceeded')
return (None, None, return (None, None,
psi_t0, psi_kill_exceeded_timer, psi_kill_exceeded_timer,
psi_term_exceeded_timer, x0) psi_term_exceeded_timer, x0)
delta0 = monotonic() - x0 delta0 = monotonic() - x0
@ -2185,14 +2096,12 @@ def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
psi_metrics, psi_path, psi_avg_value)) psi_metrics, psi_path, psi_avg_value))
if psi_avg_value is None: if psi_avg_value is None:
return (None, None, psi_t0, -0.0001, -0.0001, x0) return (None, None, -0.0001, -0.0001, x0)
psi_post_action_delay_timer = monotonic() - last_action_dict['t'] # psi_t0 psi_post_action_delay_timer = monotonic() - last_action_dict['t']
if psi_post_action_delay_timer >= psi_post_action_delay: psi_post_action_delay_exceeded = bool(
psi_post_action_delay_exceeded = True psi_post_action_delay_timer >= psi_post_action_delay)
else:
psi_post_action_delay_exceeded = False
if psi_avg_value >= hard_threshold_max_psi: if psi_avg_value >= hard_threshold_max_psi:
sigkill_psi_exceeded = True sigkill_psi_exceeded = True
@ -2218,8 +2127,7 @@ def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
'eded: {}, hard_psi_excess_duration: {}'.format( 'eded: {}, hard_psi_excess_duration: {}'.format(
ma_hard_threshold_exceded, ma_hard_threshold_exceded,
sigkill_psi_exceeded, sigkill_psi_exceeded,
round(psi_kill_exceeded_timer, 1) round(psi_kill_exceeded_timer, 1)))
))
if (sigkill_psi_exceeded and psi_kill_exceeded_timer >= if (sigkill_psi_exceeded and psi_kill_exceeded_timer >=
psi_excess_duration and psi_post_action_delay_exceeded and psi_excess_duration and psi_post_action_delay_exceeded and
@ -2227,9 +2135,9 @@ def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
mem_info = 'Memory status that requires corrective actions:\n MemAv' \ mem_info = 'Memory status that requires corrective actions:\n MemAv' \
'ailable [{} MiB, {} %] <= hard_threshold_min_mem [{} MiB' \ 'ailable [{} MiB, {} %] <= hard_threshold_min_mem [{} MiB' \
', {} %]\n PSI avg value ({}) >= hard_threshold_max_psi ' \ ', {} %]\n Current PSI metric value ({}) >= hard_thresho' \
'({})\n PSI avg value exceeded psi_excess_duration (valu' \ 'ld_max_psi ({})\n PSI metric value exceeded psi_excess_' \
'e={}s) for {}s'.format( 'duration ({}s) for {}s'.format(
kib_to_mib(mem_available), kib_to_mib(mem_available),
percent(mem_available / mem_total), percent(mem_available / mem_total),
kib_to_mib(hard_threshold_min_mem_kb), kib_to_mib(hard_threshold_min_mem_kb),
@ -2237,10 +2145,9 @@ def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
psi_avg_value, psi_avg_value,
hard_threshold_max_psi, hard_threshold_max_psi,
psi_excess_duration, psi_excess_duration,
round(psi_kill_exceeded_timer, 1) round(psi_kill_exceeded_timer, 1))
)
return (SIGKILL, mem_info, psi_t0, psi_kill_exceeded_timer, return (SIGKILL, mem_info, psi_kill_exceeded_timer,
psi_term_exceeded_timer, x0) psi_term_exceeded_timer, x0)
if psi_avg_value >= soft_threshold_max_psi: if psi_avg_value >= soft_threshold_max_psi:
@ -2262,8 +2169,7 @@ def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
'eded: {}, soft_psi_excess_duration: {}'.format( 'eded: {}, soft_psi_excess_duration: {}'.format(
ma_soft_threshold_exceded, ma_soft_threshold_exceded,
sigterm_psi_exceeded, sigterm_psi_exceeded,
round(psi_term_exceeded_timer, 1) round(psi_term_exceeded_timer, 1)))
))
if (sigterm_psi_exceeded and psi_term_exceeded_timer >= if (sigterm_psi_exceeded and psi_term_exceeded_timer >=
psi_excess_duration and psi_post_action_delay_exceeded and psi_excess_duration and psi_post_action_delay_exceeded and
@ -2271,9 +2177,9 @@ def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
mem_info = 'Memory status that requires corrective actions:\n MemA' \ mem_info = 'Memory status that requires corrective actions:\n MemA' \
'vailable [{} MiB, {} %] <= soft_threshold_min_mem [{} M' \ 'vailable [{} MiB, {} %] <= soft_threshold_min_mem [{} M' \
'iB, {} %]\n PSI avg value ({}) >= soft_threshold_max_p' \ 'iB, {} %]\n Current PSI metric value ({}) >= soft_thre' \
'si ({})\n PSI avg value exceeded psi_excess_duration (' \ 'shold_max_psi ({})\n PSI metric value exceeded psi_exc' \
'value={}s) for {}s'.format( 'ess_duration ({}s) for {}s'.format(
kib_to_mib(mem_available), kib_to_mib(mem_available),
percent(mem_available / mem_total), percent(mem_available / mem_total),
kib_to_mib(soft_threshold_min_mem_kb), kib_to_mib(soft_threshold_min_mem_kb),
@ -2281,20 +2187,19 @@ def check_psi_ex(psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
psi_avg_value, psi_avg_value,
soft_threshold_max_psi, soft_threshold_max_psi,
psi_excess_duration, psi_excess_duration,
round(psi_term_exceeded_timer, 1) round(psi_term_exceeded_timer, 1))
)
return (SIGTERM, mem_info, psi_t0, psi_kill_exceeded_timer, return (SIGTERM, mem_info, psi_kill_exceeded_timer,
psi_term_exceeded_timer, x0) psi_term_exceeded_timer, x0)
if low_memory_warnings_enabled: if low_memory_warnings_enabled:
if (psi_avg_value >= warning_threshold_max_psi and if (psi_avg_value >= warning_threshold_max_psi and
ma_warning_threshold_exceded): ma_warning_threshold_exceded):
return ('WARN', None, psi_t0, psi_kill_exceeded_timer, return ('WARN', None, psi_kill_exceeded_timer,
psi_term_exceeded_timer, x0) psi_term_exceeded_timer, x0)
return (None, None, psi_t0, psi_kill_exceeded_timer, return (None, None, psi_kill_exceeded_timer,
psi_term_exceeded_timer, x0) psi_term_exceeded_timer, x0)
@ -2303,7 +2208,7 @@ def is_victim_alive(victim_id):
We do not have a reliable sign of the end of the release of memory: We do not have a reliable sign of the end of the release of memory:
https://github.com/rfjakob/earlyoom/issues/128#issuecomment-507023717 https://github.com/rfjakob/earlyoom/issues/128#issuecomment-507023717
""" """
starttime, pid = victim_id.split('_pid') _, pid = victim_id.split('_pid')
new_victim_id = get_victim_id(pid) new_victim_id = get_victim_id(pid)
if victim_id != new_victim_id: if victim_id != new_victim_id:
return 0 return 0
@ -2385,7 +2290,6 @@ def is_post_oom_delay_exceeded():
def implement_corrective_action( def implement_corrective_action(
threshold, threshold,
mem_info_list, mem_info_list,
psi_t0,
psi_kill_exceeded_timer, psi_kill_exceeded_timer,
psi_term_exceeded_timer, psi_term_exceeded_timer,
x0, x0,
@ -2401,7 +2305,7 @@ def implement_corrective_action(
post_oom_delay_exceeded = is_post_oom_delay_exceeded() post_oom_delay_exceeded = is_post_oom_delay_exceeded()
if not post_oom_delay_exceeded: if not post_oom_delay_exceeded:
log(separator_out) log(separator_out)
return psi_t0 return None
time0 = monotonic() time0 = monotonic()
@ -2461,7 +2365,7 @@ def implement_corrective_action(
log(separator_out) log(separator_out)
return psi_t0 return None
if fff is None: if fff is None:
@ -2471,27 +2375,27 @@ def implement_corrective_action(
log(separator_out) log(separator_out)
return psi_t0 return None
pid, victim_badness, name, victim_id = fff pid, victim_badness, name, victim_id = fff
post_oom_delay_exceeded = is_post_oom_delay_exceeded() post_oom_delay_exceeded = is_post_oom_delay_exceeded()
if not post_oom_delay_exceeded: if not post_oom_delay_exceeded:
log(separator_out) log(separator_out)
return psi_t0 return None
log('Recheck memory levels...') log('Recheck memory levels...')
(masf_threshold, masf_info, mem_available, hard_threshold_min_swap_kb, (masf_threshold, masf_info, mem_available, _, _, swap_free, _
soft_threshold_min_swap_kb, swap_free, swap_total) = check_mem_swap_ex() ) = check_mem_swap_ex()
if zram_checking_enabled: if zram_checking_enabled:
zram_threshold, zram_info, mem_used_zram = check_zram_ex() zram_threshold, zram_info, _ = check_zram_ex()
if CHECK_PSI: if CHECK_PSI:
(psi_threshold, psi_info, psi_t0, psi_kill_exceeded_timer, (psi_threshold, psi_info, psi_kill_exceeded_timer,
psi_term_exceeded_timer, x0) = check_psi_ex( psi_term_exceeded_timer, x0) = check_psi_ex(
psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
mem_available) mem_available)
if (masf_threshold is SIGKILL or zram_threshold is SIGKILL or if (masf_threshold is SIGKILL or zram_threshold is SIGKILL or
@ -2527,7 +2431,7 @@ def implement_corrective_action(
else: else:
log('Thresholds is not exceeded now') log('Thresholds is not exceeded now')
log(separator_out) log(separator_out)
return psi_t0 return None
for i in mem_info_list: for i in mem_info_list:
log(i) log(i)
@ -2535,7 +2439,7 @@ def implement_corrective_action(
if new_threshold is None or new_threshold == 'WARN': if new_threshold is None or new_threshold == 'WARN':
log('Thresholds is not exceeded now') log('Thresholds is not exceeded now')
log(separator_out) log(separator_out)
return psi_t0 return None
threshold = new_threshold threshold = new_threshold
@ -2560,7 +2464,7 @@ def implement_corrective_action(
log(separator_out) log(separator_out)
return psi_t0 return None
if victim_badness >= min_badness: if victim_badness >= min_badness:
@ -2573,7 +2477,7 @@ def implement_corrective_action(
log(separator_out) log(separator_out)
return psi_t0 return None
log_meminfo() log_meminfo()
@ -2626,7 +2530,7 @@ def implement_corrective_action(
post_oom_delay_exceeded = is_post_oom_delay_exceeded() post_oom_delay_exceeded = is_post_oom_delay_exceeded()
if not post_oom_delay_exceeded: if not post_oom_delay_exceeded:
log(separator_out) log(separator_out)
return psi_t0 return None
if soft_match: if soft_match:
@ -2684,18 +2588,9 @@ def implement_corrective_action(
print_stat_dict() print_stat_dict()
except FileNotFoundError:
vwd = True
key = 'Cannot send a signal: FileNotFoundError'
update_stat_dict(key)
print_stat_dict()
log(key)
except ProcessLookupError: except ProcessLookupError:
vwd = True vwd = True
key = 'Cannot send a signal: ProcessLookupError' key = 'Selected process died before corrective action'
update_stat_dict(key) update_stat_dict(key)
print_stat_dict() print_stat_dict()
log(key) log(key)
@ -2707,8 +2602,7 @@ def implement_corrective_action(
update_stat_dict(key) update_stat_dict(key)
print_stat_dict() print_stat_dict()
log('Sleep {}s'.format(post_soft_action_delay)) log('Sleep {}s'.format(post_soft_action_delay))
sleep(post_soft_action_delay) sleep(10)
# do not send signal twice!
if not vwd: if not vwd:
if victim_id not in v_dict: if victim_id not in v_dict:
@ -2759,7 +2653,7 @@ def implement_corrective_action(
sleep(post_zombie_delay) sleep(post_zombie_delay)
break break
mem_available, swap_total, swap_free = check_mem_and_swap() mem_available, _, swap_free = check_mem_and_swap()
ma_mib = int(mem_available) / 1024.0 ma_mib = int(mem_available) / 1024.0
sf_mib = int(swap_free) / 1024.0 sf_mib = int(swap_free) / 1024.0
log('Memory status after implementing a corrective act' log('Memory status after implementing a corrective act'
@ -2806,7 +2700,7 @@ def implement_corrective_action(
log(separator_out) log(separator_out)
return psi_t0 return None
def sleep_after_check_mem(): def sleep_after_check_mem():
@ -3133,8 +3027,6 @@ SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE']) SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE'])
conf_err_mess = 'Invalid config. Exit.'
sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP] sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP]
sig_dict = { sig_dict = {
@ -3149,11 +3041,7 @@ self_pid = str(os.getpid())
self_uid = os.geteuid() self_uid = os.geteuid()
if self_uid == 0: root = bool(self_uid == 0)
root = True
else:
root = False
last_action_dict = dict() last_action_dict = dict()
@ -3295,19 +3183,11 @@ except ValueError:
config = os.path.abspath(config) config = os.path.abspath(config)
print('Starting nohang with the config: {}'.format(config)) print('Starting nohang with config {}'.format(config))
# separator_in = '>>' + '=' * 75 + '>>' separator_in = '>>=== STARTING implement_corrective_action() ====>>'
# separator_out = '<<' + '=' * 75 + '<<' separator_out = '<<=== FINISHING implement_corrective_action() ===<<'
# separator_in = '>>=== implement_corrective_action() ==================>>'
# separator_out = '<<=== exit from implement_corrective_action() ========<<'
separator_in = '>----- implement_corrective_action() ------------------>'
separator_out = '<----- exit from implement_corrective_action() --------<'
############################################################################### ###############################################################################
@ -3493,20 +3373,10 @@ try:
badness_adj_re_environ_list.append((badness_adj, reg_exp)) badness_adj_re_environ_list.append((badness_adj, reg_exp))
except PermissionError: except (PermissionError, UnicodeDecodeError, IsADirectoryError, IndexError,
errprint('PermissionError', conf_err_mess) FileNotFoundError) as e:
exit(1) errprint(e)
except UnicodeDecodeError: errprint('Invalid config. Exit.')
errprint('UnicodeDecodeError', conf_err_mess)
exit(1)
except IsADirectoryError:
errprint('IsADirectoryError', conf_err_mess)
exit(1)
except IndexError:
errprint('IndexError', conf_err_mess)
exit(1)
except FileNotFoundError:
errprint('FileNotFoundError', conf_err_mess)
exit(1) exit(1)
@ -3644,10 +3514,7 @@ except Exception as e:
log('WARNING: PSI metrics are not provided by the kernel: {}'.format( log('WARNING: PSI metrics are not provided by the kernel: {}'.format(
e)) e))
if PSI_KERNEL_OK and psi_checking_enabled: CHECK_PSI = bool(PSI_KERNEL_OK and psi_checking_enabled)
CHECK_PSI = True
else:
CHECK_PSI = False
zram_checking_enabled = conf_parse_bool('zram_checking_enabled') zram_checking_enabled = conf_parse_bool('zram_checking_enabled')
@ -3873,10 +3740,7 @@ else:
if 'warning_exe' in config_dict: if 'warning_exe' in config_dict:
warning_exe = config_dict['warning_exe'] warning_exe = config_dict['warning_exe']
if warning_exe != '': check_warning_exe = bool(warning_exe != '')
check_warning_exe = True
else:
check_warning_exe = False
else: else:
missing_config_key('warning_exe') missing_config_key('warning_exe')
@ -3929,12 +3793,7 @@ else:
over_sleep = min_sleep over_sleep = min_sleep
sensitivity_test_time = over_sleep / 4 sensitivity_test_time = over_sleep / 4
stable_sleep = bool(max_sleep == min_sleep)
if max_sleep == min_sleep:
stable_sleep = True
else:
stable_sleep = False
if print_proc_table_flag: if print_proc_table_flag:
@ -4047,7 +3906,6 @@ mem_info = None
psi_kill_exceeded_timer = psi_term_exceeded_timer = -0.0001 psi_kill_exceeded_timer = psi_term_exceeded_timer = -0.0001
psi_t0 = monotonic()
psi_threshold = zram_threshold = zram_info = psi_info = None psi_threshold = zram_threshold = zram_info = psi_info = None
@ -4133,39 +3991,30 @@ while True:
zram_threshold, zram_info, mem_used_zram = check_zram_ex() zram_threshold, zram_info, mem_used_zram = check_zram_ex()
if CHECK_PSI: if CHECK_PSI:
(psi_threshold, psi_info, psi_t0, psi_kill_exceeded_timer, (psi_threshold, psi_info, psi_kill_exceeded_timer,
psi_term_exceeded_timer, x0) = check_psi_ex( psi_term_exceeded_timer, x0) = check_psi_ex(
psi_t0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0, psi_kill_exceeded_timer, psi_term_exceeded_timer, x0,
mem_available) mem_available)
if print_mem_check_results: if print_mem_check_results:
if CHECK_PSI: if CHECK_PSI:
psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics) psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
if monotonic() - psi_t0 >= psi_post_action_delay: psi_post_action_delay_exceeded = bool(
psi_post_action_delay_exceeded = True monotonic() >= psi_post_action_delay)
else:
psi_post_action_delay_exceeded = False
if print_mem_check_results: if print_mem_check_results:
psi_avg_string = 'PSI: {} | '.format( psi_avg_string = 'PSI: {} | '.format(
str(psi_avg_value).rjust(6)) str(psi_avg_value).rjust(6))
wt1 = monotonic() wt1 = monotonic()
delta = (mem_available + swap_free) - new_mem delta = (mem_available + swap_free) - new_mem
t_cycle = wt1 - wt2 t_cycle = wt1 - wt2
report_delta = wt1 - report0 report_delta = wt1 - report0
if report_delta >= min_mem_report_interval: if report_delta >= min_mem_report_interval:
mem_report = True mem_report = True
new_mem = mem_available + swap_free new_mem = mem_available + swap_free
report0 = wt1 report0 = wt1
else: else:
mem_report = False mem_report = False
@ -4175,8 +4024,7 @@ while True:
speed = delta / 1024.0 / report_delta speed = delta / 1024.0 / report_delta
speed_info = ' | dMem: {} M/s'.format( speed_info = ' | dMem: {} M/s'.format(
str(round(speed)).rjust(5) str(round(speed)).rjust(5))
)
# Calculate 'swap-column' width # Calculate 'swap-column' width
swap_len = len(str(round(swap_total / 1024.0))) swap_len = len(str(round(swap_total / 1024.0)))
@ -4187,9 +4035,7 @@ while True:
psi_avg_string, psi_avg_string,
human(mem_available, mem_len), human(mem_available, mem_len),
just_percent_mem(mem_available / mem_total), just_percent_mem(mem_available / mem_total),
speed_info speed_info))
)
)
elif swap_total > 0 and mem_used_zram == 0: elif swap_total > 0 and mem_used_zram == 0:
log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format( log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format(
@ -4198,9 +4044,7 @@ while True:
just_percent_mem(mem_available / mem_total), just_percent_mem(mem_available / mem_total),
human(swap_free, swap_len), human(swap_free, swap_len),
just_percent_swap(swap_free / (swap_total + 0.1)), just_percent_swap(swap_free / (swap_total + 0.1)),
speed_info speed_info))
)
)
else: else:
log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem' log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
@ -4212,9 +4056,7 @@ while True:
just_percent_swap(swap_free / (swap_total + 0.1)), just_percent_swap(swap_free / (swap_total + 0.1)),
human(mem_used_zram, mem_len), human(mem_used_zram, mem_len),
just_percent_mem(mem_used_zram / mem_total), just_percent_mem(mem_used_zram / mem_total),
speed_info speed_info))
)
)
if (masf_threshold == SIGKILL or zram_threshold == SIGKILL or if (masf_threshold == SIGKILL or zram_threshold == SIGKILL or
psi_threshold == SIGKILL): psi_threshold == SIGKILL):
@ -4231,10 +4073,9 @@ while True:
if psi_info is not None: if psi_info is not None:
mem_info_list.append(psi_info) mem_info_list.append(psi_info)
psi_t0 = implement_corrective_action( implement_corrective_action(
threshold, threshold,
mem_info_list, mem_info_list,
psi_t0,
psi_kill_exceeded_timer, psi_kill_exceeded_timer,
psi_term_exceeded_timer, psi_term_exceeded_timer,
x0, psi_threshold, zram_threshold, zram_info, psi_info) x0, psi_threshold, zram_threshold, zram_info, psi_info)
@ -4256,10 +4097,9 @@ while True:
mem_info_list.append(psi_info) mem_info_list.append(psi_info)
psi_t0 = implement_corrective_action( implement_corrective_action(
threshold, threshold,
mem_info_list, mem_info_list,
psi_t0,
psi_kill_exceeded_timer, psi_kill_exceeded_timer,
psi_term_exceeded_timer, psi_term_exceeded_timer,
x0, psi_threshold, zram_threshold, zram_info, psi_info) x0, psi_threshold, zram_threshold, zram_info, psi_info)