fix alerts

This commit is contained in:
Alexey Avramov 2019-07-18 22:57:41 +09:00
parent 9f438726b4
commit 75f05959fc
5 changed files with 66 additions and 86 deletions

51
nohang
View File

@ -344,7 +344,7 @@ def log(*msg):
sleep(0.01)
if separate_log:
try:
info(*msg)
logging.info(*msg)
except OSError:
sleep(0.01)
@ -835,16 +835,22 @@ def check_zram():
return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
def format_time(t):
"""
"""
t = int(t)
if t < 60:
return '{} sec'.format(t)
elif t >= 60 and t < 3600:
elif (t < 3600 and t >= 60):
m = t // 60
s = t % 60
return '{} min {} sec'.format(m, s)
else:
h = t // 3600
s0 = t - h * 3600
@ -853,6 +859,8 @@ def format_time(t):
return '{} h {} min {} sec'.format(h, m, s)
def string_to_float_convert_test(string):
"""Try to interprete string values as floats."""
try:
@ -1002,11 +1010,6 @@ def send_notify(threshold, name, pid):
pid: str process pid
"""
# wait for memory release after corrective action
# may be useful if free memory was about 0 immediately after
# corrective action
sleep(0.05)
title = 'Freeze prevention'
body = '<b>{}</b> [{}] <b>{}</b>'.format(
notify_sig_dict[threshold],
@ -1237,9 +1240,14 @@ def find_victim_info(pid, victim_badness, name):
state = line.split('\t')[1].rstrip()
continue
"""
if n is ppid_index:
ppid = line.split('\t')[1]
# ppid = line.split('\t')[1]
continue
"""
if n is uid_index:
uid = line.split('\t')[2]
@ -1299,8 +1307,13 @@ def find_victim_info(pid, victim_badness, name):
if i is state_index:
state = f_list[i].split('\t')[1].rstrip()
"""
if i is ppid_index:
ppid = f_list[i].split('\t')[1]
pass
# ppid = f_list[i].split('\t')[1]
"""
if i is uid_index:
uid = f_list[i].split('\t')[2]
@ -1901,10 +1914,14 @@ def implement_corrective_action(
pid)).replace('$SERVICE', service)
exit_status = exe(cmd)
"""
if exit_status == 0:
success = True
else:
success = False
"""
response_time = time() - time0
@ -1937,18 +1954,16 @@ def implement_corrective_action(
except FileNotFoundError:
vwd = True
success = False
# success = False
response_time = time() - time0
send_result = 'no such process; response time: {} ms'.format(
round(response_time * 1000))
# send_result = 'no such process; response time: {} ms'.format(round(response_time * 1000))
key = 'The victim died in the search process: ' \
'FileNotFoundError'
except ProcessLookupError:
vwd = True
success = False
# success = False
response_time = time() - time0
send_result = 'no such process; response time: {} ms'.format(
round(response_time * 1000))
# send_result = 'no such process; response time: {} ms'.format(round(response_time * 1000))
key = 'The victim died in the search process: ' \
'ProcessLookupError'
@ -1974,7 +1989,7 @@ def implement_corrective_action(
# print(v_dict)
response_time = time() - time0
# response_time = time() - time0
# log('success: ' + str(success))
# log('victim will die: ' + str(vwd))
@ -2934,8 +2949,6 @@ separate_log = conf_parse_bool('separate_log')
if separate_log:
import logging
from logging import basicConfig
from logging import info
log_dir = '/var/log/nohang'
@ -2957,7 +2970,7 @@ if separate_log:
print('ERROR: log PermissionError')
try:
basicConfig(
logging.basicConfig(
filename=logfile,
level=logging.INFO,
format="%(asctime)s: %(message)s")

View File

@ -119,7 +119,7 @@ except Exception:
try:
from os import listdir, path, remove
from os import listdir, path
from subprocess import Popen, TimeoutExpired
from sys import argv
except OSError:

View File

@ -45,13 +45,13 @@ def pid_to_status_units(pid):
if i is 1:
name = f_list[0].split('\t')[1]
if i is uid_index:
if i == uid_index:
uid = f_list[i].split('\t')[2]
if i is vm_rss_index:
if i == vm_rss_index:
vm_rss = f_list[i].split('\t')[1][:-3]
if i is vm_swap_index:
if i == vm_swap_index:
vm_swap = f_list[i].split('\t')[1][:-3]
return name, uid, vm_rss, vm_swap

View File

@ -3,19 +3,7 @@
from ctypes import CDLL
from time import sleep
from sys import argv
"""
Execute the command
find /sys/fs/cgroup -name memory.pressure
to find available memory.pressue files (except /proc/pressure/memory).
(actual for cgroup2)
"""
if len(argv) > 1:
psi_path = argv[1]
else:
psi_path = '/proc/pressure/memory'
import os
def mlockall():
@ -33,16 +21,13 @@ def mlockall():
MCL_CURRENT | MCL_FUTURE
)
if result != 0:
print('WARNING: cannot lock all memory')
pass
else:
pass
else:
pass
mlockall()
def psi_path_to_metrics(psi_path):
with open(psi_path) as f:
@ -62,8 +47,18 @@ def psi_path_to_metrics(psi_path):
full_avg10, full_avg60, full_avg300)
print('Path to PSI file: {}\n'.format(psi_path))
if len(argv) > 1:
psi_path = argv[1]
else:
psi_path = '/proc/pressure/memory'
if not os.path.exists(psi_path):
print('PSI path does not exist. Exit.')
exit()
mlockall()
print('Path to PSI file: {}\n'.format(psi_path))
print(' avg10 avg60 avg300 avg10 avg60 avg300')

60
psi-top
View File

@ -1,45 +1,9 @@
#!/usr/bin/env python3
from ctypes import CDLL
from time import sleep, time
import os
"""
Execute the command
find /sys/fs/cgroup -name memory.pressure
to find available memory.pressue files (except /proc/pressure/memory).
(actual for cgroup2)
"""
psi_path = '/proc/pressure/memory'
def mlockall():
MCL_CURRENT = 1
MCL_FUTURE = 2
MCL_ONFAULT = 4
libc = CDLL('libc.so.6', use_errno=True)
result = libc.mlockall(
MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
)
if result != 0:
result = libc.mlockall(
MCL_CURRENT | MCL_FUTURE
)
if result != 0:
print('WARNING: cannot lock all memory')
else:
pass
else:
pass
mlockall()
t0 = time()
def psi_path_to_metrics(psi_path):
with open(psi_path) as f:
@ -59,7 +23,6 @@ def psi_path_to_metrics(psi_path):
full_avg10, full_avg60, full_avg300)
def cgroup2_root():
"""
"""
@ -93,7 +56,19 @@ def psi_path_to_cgroup2(path):
i = cgroup2_root()
if i is None:
print('cgroup2 not mounted')
else:
print('cgroup2 root dir:', i)
psi_support = os.path.exists(psi_path)
if not psi_support:
print('PSI is not supported, /proc/pressure/memory does not exist. Exit.')
exit(1)
if i is not None:
y = get_psi_mem_files(i)
for path in y:
@ -105,14 +80,16 @@ print(' avg10 avg60 avg300 avg10 avg60 avg300 cgroup2')
print(' ----- ----- ------ ----- ----- ------ ---------')
(some_avg10, some_avg60, some_avg300, full_avg10, full_avg60, full_avg300) = psi_path_to_metrics('/proc/pressure/memory')
(some_avg10, some_avg60, some_avg300, full_avg10, full_avg60, full_avg300
) = psi_path_to_metrics('/proc/pressure/memory')
print('some {} {} {} | full {} {} {} {}'.format(
some_avg10.rjust(6),
some_avg60.rjust(6),
some_avg300.rjust(6),
full_avg10.rjust(6),
full_avg60.rjust(6),
full_avg300.rjust(6), '[SYSTEM]'))
full_avg300.rjust(6), '[SYSTEM_WIDE]'))
for psi_path in path_list:
@ -126,8 +103,3 @@ for psi_path in path_list:
full_avg10.rjust(6),
full_avg60.rjust(6),
full_avg300.rjust(6), psi_path_to_cgroup2(psi_path)))
print(time() - t0)