fix alerts
This commit is contained in:
parent
9f438726b4
commit
75f05959fc
51
nohang
51
nohang
@ -344,7 +344,7 @@ def log(*msg):
|
||||
sleep(0.01)
|
||||
if separate_log:
|
||||
try:
|
||||
info(*msg)
|
||||
logging.info(*msg)
|
||||
except OSError:
|
||||
sleep(0.01)
|
||||
|
||||
@ -835,16 +835,22 @@ def check_zram():
|
||||
return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
|
||||
|
||||
|
||||
|
||||
|
||||
def format_time(t):
|
||||
"""
|
||||
"""
|
||||
|
||||
t = int(t)
|
||||
|
||||
if t < 60:
|
||||
return '{} sec'.format(t)
|
||||
elif t >= 60 and t < 3600:
|
||||
|
||||
elif (t < 3600 and t >= 60):
|
||||
m = t // 60
|
||||
s = t % 60
|
||||
return '{} min {} sec'.format(m, s)
|
||||
|
||||
else:
|
||||
h = t // 3600
|
||||
s0 = t - h * 3600
|
||||
@ -853,6 +859,8 @@ def format_time(t):
|
||||
return '{} h {} min {} sec'.format(h, m, s)
|
||||
|
||||
|
||||
|
||||
|
||||
def string_to_float_convert_test(string):
|
||||
"""Try to interprete string values as floats."""
|
||||
try:
|
||||
@ -1002,11 +1010,6 @@ def send_notify(threshold, name, pid):
|
||||
pid: str process pid
|
||||
"""
|
||||
|
||||
# wait for memory release after corrective action
|
||||
# may be useful if free memory was about 0 immediately after
|
||||
# corrective action
|
||||
sleep(0.05)
|
||||
|
||||
title = 'Freeze prevention'
|
||||
body = '<b>{}</b> [{}] <b>{}</b>'.format(
|
||||
notify_sig_dict[threshold],
|
||||
@ -1237,9 +1240,14 @@ def find_victim_info(pid, victim_badness, name):
|
||||
state = line.split('\t')[1].rstrip()
|
||||
continue
|
||||
|
||||
|
||||
"""
|
||||
if n is ppid_index:
|
||||
ppid = line.split('\t')[1]
|
||||
# ppid = line.split('\t')[1]
|
||||
continue
|
||||
"""
|
||||
|
||||
|
||||
|
||||
if n is uid_index:
|
||||
uid = line.split('\t')[2]
|
||||
@ -1299,8 +1307,13 @@ def find_victim_info(pid, victim_badness, name):
|
||||
if i is state_index:
|
||||
state = f_list[i].split('\t')[1].rstrip()
|
||||
|
||||
|
||||
"""
|
||||
if i is ppid_index:
|
||||
ppid = f_list[i].split('\t')[1]
|
||||
pass
|
||||
# ppid = f_list[i].split('\t')[1]
|
||||
"""
|
||||
|
||||
|
||||
if i is uid_index:
|
||||
uid = f_list[i].split('\t')[2]
|
||||
@ -1901,10 +1914,14 @@ def implement_corrective_action(
|
||||
pid)).replace('$SERVICE', service)
|
||||
exit_status = exe(cmd)
|
||||
|
||||
"""
|
||||
if exit_status == 0:
|
||||
success = True
|
||||
else:
|
||||
success = False
|
||||
"""
|
||||
|
||||
|
||||
|
||||
response_time = time() - time0
|
||||
|
||||
@ -1937,18 +1954,16 @@ def implement_corrective_action(
|
||||
|
||||
except FileNotFoundError:
|
||||
vwd = True
|
||||
success = False
|
||||
# success = False
|
||||
response_time = time() - time0
|
||||
send_result = 'no such process; response time: {} ms'.format(
|
||||
round(response_time * 1000))
|
||||
# send_result = 'no such process; response time: {} ms'.format(round(response_time * 1000))
|
||||
key = 'The victim died in the search process: ' \
|
||||
'FileNotFoundError'
|
||||
except ProcessLookupError:
|
||||
vwd = True
|
||||
success = False
|
||||
# success = False
|
||||
response_time = time() - time0
|
||||
send_result = 'no such process; response time: {} ms'.format(
|
||||
round(response_time * 1000))
|
||||
# send_result = 'no such process; response time: {} ms'.format(round(response_time * 1000))
|
||||
key = 'The victim died in the search process: ' \
|
||||
'ProcessLookupError'
|
||||
|
||||
@ -1974,7 +1989,7 @@ def implement_corrective_action(
|
||||
|
||||
# print(v_dict)
|
||||
|
||||
response_time = time() - time0
|
||||
# response_time = time() - time0
|
||||
|
||||
# log('success: ' + str(success))
|
||||
# log('victim will die: ' + str(vwd))
|
||||
@ -2934,8 +2949,6 @@ separate_log = conf_parse_bool('separate_log')
|
||||
if separate_log:
|
||||
|
||||
import logging
|
||||
from logging import basicConfig
|
||||
from logging import info
|
||||
|
||||
log_dir = '/var/log/nohang'
|
||||
|
||||
@ -2957,7 +2970,7 @@ if separate_log:
|
||||
print('ERROR: log PermissionError')
|
||||
|
||||
try:
|
||||
basicConfig(
|
||||
logging.basicConfig(
|
||||
filename=logfile,
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s: %(message)s")
|
||||
|
@ -119,7 +119,7 @@ except Exception:
|
||||
|
||||
|
||||
try:
|
||||
from os import listdir, path, remove
|
||||
from os import listdir, path
|
||||
from subprocess import Popen, TimeoutExpired
|
||||
from sys import argv
|
||||
except OSError:
|
||||
|
6
oom-sort
6
oom-sort
@ -45,13 +45,13 @@ def pid_to_status_units(pid):
|
||||
if i is 1:
|
||||
name = f_list[0].split('\t')[1]
|
||||
|
||||
if i is uid_index:
|
||||
if i == uid_index:
|
||||
uid = f_list[i].split('\t')[2]
|
||||
|
||||
if i is vm_rss_index:
|
||||
if i == vm_rss_index:
|
||||
vm_rss = f_list[i].split('\t')[1][:-3]
|
||||
|
||||
if i is vm_swap_index:
|
||||
if i == vm_swap_index:
|
||||
vm_swap = f_list[i].split('\t')[1][:-3]
|
||||
|
||||
return name, uid, vm_rss, vm_swap
|
||||
|
31
psi-monitor
31
psi-monitor
@ -3,19 +3,7 @@
|
||||
from ctypes import CDLL
|
||||
from time import sleep
|
||||
from sys import argv
|
||||
|
||||
"""
|
||||
Execute the command
|
||||
find /sys/fs/cgroup -name memory.pressure
|
||||
to find available memory.pressue files (except /proc/pressure/memory).
|
||||
(actual for cgroup2)
|
||||
"""
|
||||
|
||||
if len(argv) > 1:
|
||||
psi_path = argv[1]
|
||||
else:
|
||||
psi_path = '/proc/pressure/memory'
|
||||
|
||||
import os
|
||||
|
||||
def mlockall():
|
||||
|
||||
@ -33,16 +21,13 @@ def mlockall():
|
||||
MCL_CURRENT | MCL_FUTURE
|
||||
)
|
||||
if result != 0:
|
||||
print('WARNING: cannot lock all memory')
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
mlockall()
|
||||
|
||||
|
||||
def psi_path_to_metrics(psi_path):
|
||||
|
||||
with open(psi_path) as f:
|
||||
@ -62,8 +47,18 @@ def psi_path_to_metrics(psi_path):
|
||||
full_avg10, full_avg60, full_avg300)
|
||||
|
||||
|
||||
print('Path to PSI file: {}\n'.format(psi_path))
|
||||
if len(argv) > 1:
|
||||
psi_path = argv[1]
|
||||
else:
|
||||
psi_path = '/proc/pressure/memory'
|
||||
|
||||
if not os.path.exists(psi_path):
|
||||
print('PSI path does not exist. Exit.')
|
||||
exit()
|
||||
|
||||
mlockall()
|
||||
|
||||
print('Path to PSI file: {}\n'.format(psi_path))
|
||||
|
||||
print(' avg10 avg60 avg300 avg10 avg60 avg300')
|
||||
|
||||
|
62
psi-top
62
psi-top
@ -1,45 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from ctypes import CDLL
|
||||
from time import sleep, time
|
||||
import os
|
||||
|
||||
"""
|
||||
Execute the command
|
||||
find /sys/fs/cgroup -name memory.pressure
|
||||
to find available memory.pressue files (except /proc/pressure/memory).
|
||||
(actual for cgroup2)
|
||||
"""
|
||||
|
||||
psi_path = '/proc/pressure/memory'
|
||||
|
||||
def mlockall():
|
||||
|
||||
MCL_CURRENT = 1
|
||||
MCL_FUTURE = 2
|
||||
MCL_ONFAULT = 4
|
||||
|
||||
libc = CDLL('libc.so.6', use_errno=True)
|
||||
|
||||
result = libc.mlockall(
|
||||
MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
|
||||
)
|
||||
if result != 0:
|
||||
result = libc.mlockall(
|
||||
MCL_CURRENT | MCL_FUTURE
|
||||
)
|
||||
if result != 0:
|
||||
print('WARNING: cannot lock all memory')
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
mlockall()
|
||||
|
||||
t0 = time()
|
||||
|
||||
def psi_path_to_metrics(psi_path):
|
||||
|
||||
with open(psi_path) as f:
|
||||
@ -59,7 +23,6 @@ def psi_path_to_metrics(psi_path):
|
||||
full_avg10, full_avg60, full_avg300)
|
||||
|
||||
|
||||
|
||||
def cgroup2_root():
|
||||
"""
|
||||
"""
|
||||
@ -93,7 +56,19 @@ def psi_path_to_cgroup2(path):
|
||||
|
||||
i = cgroup2_root()
|
||||
|
||||
print('cgroup2 root dir:', i)
|
||||
if i is None:
|
||||
print('cgroup2 not mounted')
|
||||
else:
|
||||
print('cgroup2 root dir:', i)
|
||||
|
||||
|
||||
psi_support = os.path.exists(psi_path)
|
||||
|
||||
if not psi_support:
|
||||
print('PSI is not supported, /proc/pressure/memory does not exist. Exit.')
|
||||
exit(1)
|
||||
|
||||
|
||||
if i is not None:
|
||||
y = get_psi_mem_files(i)
|
||||
for path in y:
|
||||
@ -105,14 +80,16 @@ print(' avg10 avg60 avg300 avg10 avg60 avg300 cgroup2')
|
||||
|
||||
print(' ----- ----- ------ ----- ----- ------ ---------')
|
||||
|
||||
(some_avg10, some_avg60, some_avg300, full_avg10, full_avg60, full_avg300) = psi_path_to_metrics('/proc/pressure/memory')
|
||||
(some_avg10, some_avg60, some_avg300, full_avg10, full_avg60, full_avg300
|
||||
) = psi_path_to_metrics('/proc/pressure/memory')
|
||||
|
||||
print('some {} {} {} | full {} {} {} {}'.format(
|
||||
some_avg10.rjust(6),
|
||||
some_avg60.rjust(6),
|
||||
some_avg300.rjust(6),
|
||||
full_avg10.rjust(6),
|
||||
full_avg60.rjust(6),
|
||||
full_avg300.rjust(6), '[SYSTEM]'))
|
||||
full_avg300.rjust(6), '[SYSTEM_WIDE]'))
|
||||
|
||||
|
||||
for psi_path in path_list:
|
||||
@ -126,8 +103,3 @@ for psi_path in path_list:
|
||||
full_avg10.rjust(6),
|
||||
full_avg60.rjust(6),
|
||||
full_avg300.rjust(6), psi_path_to_cgroup2(psi_path)))
|
||||
|
||||
|
||||
print(time() - t0)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user