Add checking kmsg for OOM events

This commit is contained in:
Alexey Avramov 2020-08-10 00:17:41 +09:00
parent ad2ff5d81d
commit 2500c6cef7
6 changed files with 385 additions and 40 deletions

View File

@ -1,24 +1,39 @@
This is the configuration file of the nohang daemon. ## This is the configuration file of the nohang daemon.
The configuration includes the following sections:
1. Common zram settings ## The configuration includes the following sections:
2. Common PSI settings ## 0. Check kernel messages for OOM events
3. Poll rate ## 1. Common zram settings
4. Warnings and notifications ## 2. Common PSI settings
5. Soft (SIGTERM) threshold ## 3. Poll rate
6. Hard (SIGKILL) threshold ## 4. Warnings and notifications
7. Customize victim selection: adjusting badness of processes ## 5. Soft (SIGTERM) threshold
8. Customize soft corrective actions ## 6. Hard (SIGKILL) threshold
9. Misc settings ## 7. Customize victim selection: adjusting badness of processes
10. Verbosity, debug, logging ## 8. Customize soft corrective actions
## 9. Misc settings
## 10. Verbosity, debug, logging
WARNING! ## WARNING!
- Lines starting with #, tabs and whitespace characters are comments. ## - Lines starting with #, tabs and whitespace characters are comments.
- Lines starting with @ contain optional parameters that may be repeated. ## - Lines starting with @ contain optional parameters that may be repeated.
- All values are case sensitive. ## - All values are case sensitive.
- nohang doesn't forbid you to shoot yourself in the foot. Be careful! ## - nohang doesn't forbid you to shoot yourself in the foot. Be careful!
- Restart the daemon after editing the file to apply the new settings. ## - Restart the daemon after editing the file to apply the new settings.
- You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang-desktop.conf ## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf
## To find config keys descriptions see man(8) nohang
###############################################################################
## 0. Check kernel messages for OOM events
# @check_kmsg
## Type: boolean
## Comment/uncomment to disable/enable checking kmsg for OOM events
# @debug_kmsg
## Type: boolean
## Comment/uncomment to disable/enable debug checking kmsg
############################################################################### ###############################################################################

View File

@ -1,6 +1,7 @@
## This is the configuration file of the nohang daemon. ## This is the configuration file of the nohang daemon.
## The configuration includes the following sections: ## The configuration includes the following sections:
## 0. Check kernel messages for OOM events
## 1. Common zram settings ## 1. Common zram settings
## 2. Common PSI settings ## 2. Common PSI settings
## 3. Poll rate ## 3. Poll rate
@ -24,6 +25,18 @@
############################################################################### ###############################################################################
## 0. Check kernel messages for OOM events
# @check_kmsg
## Type: boolean
## Comment/uncomment to disable/enable checking kmsg for OOM events
# @debug_kmsg
## Type: boolean
## Comment/uncomment to disable/enable debug checking kmsg
###############################################################################
## 1. Common zram settings ## 1. Common zram settings
zram_checking_enabled = False zram_checking_enabled = False

View File

@ -1,4 +1,39 @@
This is nohang config file. ## This is the configuration file of the nohang daemon.
## The configuration includes the following sections:
## 0. Check kernel messages for OOM events
## 1. Common zram settings
## 2. Common PSI settings
## 3. Poll rate
## 4. Warnings and notifications
## 5. Soft (SIGTERM) threshold
## 6. Hard (SIGKILL) threshold
## 7. Customize victim selection: adjusting badness of processes
## 8. Customize soft corrective actions
## 9. Misc settings
## 10. Verbosity, debug, logging
## WARNING!
## - Lines starting with #, tabs and whitespace characters are comments.
## - Lines starting with @ contain optional parameters that may be repeated.
## - All values are case sensitive.
## - nohang doesn't forbid you to shoot yourself in the foot. Be careful!
## - Restart the daemon after editing the file to apply the new settings.
## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf
## To find config keys descriptions see man(8) nohang
###############################################################################
## 0. Check kernel messages for OOM events
# @check_kmsg
## Type: boolean
## Comment/uncomment to disable/enable checking kmsg for OOM events
# @debug_kmsg
## Type: boolean
## Comment/uncomment to disable/enable debug checking kmsg
############################################################################### ###############################################################################

View File

@ -228,7 +228,7 @@ def exe(cmd):
proc.wait(timeout=exe_timeout) proc.wait(timeout=exe_timeout)
exit_status = proc.poll() exit_status = proc.poll()
t4 = monotonic() t4 = monotonic()
log('Command-{} execution completed in {} sec; exit status' log('Command-{} execution completed in {}s; exit status'
': {}'.format(cmd_num, round(t4 - t3, 3), exit_status)) ': {}'.format(cmd_num, round(t4 - t3, 3), exit_status))
except TimeoutExpired: except TimeoutExpired:
proc.kill() proc.kill()
@ -352,7 +352,7 @@ def pop(cmd):
cmd_num = cmd_num_dict['cmd_num'] cmd_num = cmd_num_dict['cmd_num']
if swap_total == 0: if swap_total == 0:
wait_time = 10 wait_time = 15
else: else:
wait_time = 30 wait_time = 30
@ -375,7 +375,7 @@ def pop(cmd):
t4 = monotonic() t4 = monotonic()
if debug_gui_notifications: if debug_gui_notifications:
log('Command-{} execution completed in {} sec; exit status' log('Command-{} execution completed in {}s; exit status'
': {}'.format(cmd_num, round(t4 - t3, 3), err)) ': {}'.format(cmd_num, round(t4 - t3, 3), err))
except TimeoutExpired: except TimeoutExpired:
@ -537,6 +537,10 @@ def send_notify_etc(pid, name, command):
def check_config(): def check_config():
""" """
""" """
log('\n0. Check kernel messages for OOM events')
log(' @check_kmsg: <{}>'.format(check_kmsg))
log(' @debug_kmsg: <{}>'.format(debug_kmsg))
log('\n1. Common zram settings') log('\n1. Common zram settings')
log(' zram_checking_enabled: {}'.format(zram_checking_enabled)) log(' zram_checking_enabled: {}'.format(zram_checking_enabled))
@ -2352,6 +2356,32 @@ def log_meminfo():
)) ))
def is_post_oom_delay_exceeded():
"""
"""
oom_t = oom_dict['t']
if oom_t is not None:
post_oom_t = monotonic() - oom_t
if post_oom_t < post_oom_delay:
log('Time since OOM: {}s; post OOM delay ({}s) is not exceed'
'ed'.format(round(post_oom_t, 3), post_oom_delay))
if debug_sleep:
log('Sleep {}s'.format(over_sleep))
sleep(over_sleep)
return False
else:
return True
else:
return True
def implement_corrective_action( def implement_corrective_action(
threshold, threshold,
mem_info_list, mem_info_list,
@ -2368,6 +2398,11 @@ def implement_corrective_action(
debug_corrective_action = True debug_corrective_action = True
post_oom_delay_exceeded = is_post_oom_delay_exceeded()
if not post_oom_delay_exceeded:
log(separator_out)
return psi_t0
time0 = monotonic() time0 = monotonic()
nu = [] nu = []
@ -2440,6 +2475,11 @@ def implement_corrective_action(
pid, victim_badness, name, victim_id = fff pid, victim_badness, name, victim_id = fff
post_oom_delay_exceeded = is_post_oom_delay_exceeded()
if not post_oom_delay_exceeded:
log(separator_out)
return psi_t0
log('Recheck memory levels...') log('Recheck memory levels...')
(masf_threshold, masf_info, mem_available, hard_threshold_min_swap_kb, (masf_threshold, masf_info, mem_available, hard_threshold_min_swap_kb,
@ -2583,6 +2623,11 @@ def implement_corrective_action(
start_action = monotonic() start_action = monotonic()
post_oom_delay_exceeded = is_post_oom_delay_exceeded()
if not post_oom_delay_exceeded:
log(separator_out)
return psi_t0
if soft_match: if soft_match:
cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name( cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name(
@ -2880,6 +2925,182 @@ def calculate_percent(arg_key):
return mem_min_kb, mem_min_mb, mem_min_percent return mem_min_kb, mem_min_mb, mem_min_percent
def is_kmsg_ok():
"""
"""
m_test0 = monotonic()
test_string = 'nohang: clock calibration: {}\n'.format(m_test0)
try:
write(kmsg_path, test_string)
except Exception as e:
log(e)
return None
try:
with open(kmsg_path):
pass
except Exception as e:
log(e)
return None
return m_test0, test_string
def check_kmsg_fn():
"""
Checking kernel messages for OOM events.
It catches lines like follow:
3,591,248095206,-;Out of memory: Kill process 1061 (tail) score 917 or sacr
ifice child
3,1699,7208057400,-;Out of memory: Killed process 9277 (tail) total-vm:7837
24kB, anon-rss:566796kB, file-rss:1508kB, shmem-rss:0kB, UID:1000
3,728,398101038,-;Memory cgroup out of memory: Killed process 2497 (tail) t
otal-vm:6468944kB, anon-rss:3013188kB, file-rss:1676kB, shmem-rss:0kB, UID
: 1000 pgtables:12300kB oom_score_adj:0
4,2928853,54035939870,-;tail invoked oom-killer: gfp_mask=0x100cca(GFP_HIGH
USER_MOVABLE), order=0, oom_score_adj=0
6,2928994,54035940084,-;Tasks in /user.slice/user-1000.slice/session-235.sc
ope are going to be killed due to memory.oom.group set
6,2929028,54036119432,-;oom_reaper: reaped process 15512 (tail), now anon-r
ss:0kB, file-rss:0kB, shmem-rss:0kB
"""
broken_pipe = False
start_t0 = monotonic()
counter = 0
with open(kmsg_path) as f:
while True:
d = monotonic() - start_t0
if d > kmsg_start_timeout:
log('kmsg: cannot start in {}s'.format(
kmsg_start_timeout))
return None
try:
s = f.readline()
counter += 1
if broken_pipe:
log('kmsg: BrokenPipeError occurred, some messages lost')
broken_pipe = False
except BrokenPipeError:
broken_pipe = True
if debug_kmsg:
log('debug kmsg: BrokenPipeError, sleep {}s'.format(
sleep_at_broken_pipe))
sleep(sleep_at_broken_pipe)
continue
if test_string in s:
if debug_kmsg:
log('debug kmsg: {}'.format(s[:-1]))
kmsg_mono0 = float(s.split(',')[2]) / 1000000
kmsg_t_delta = kmsg_mono0 - m_test0
if debug_kmsg:
log('debug kmsg: kmsg_t_delta (kmsg_mono - test_mono)'
': {}'.format(round(kmsg_t_delta, 3)))
log('Checking kmsg for OOM events has started in {}s;'
' {} lines parsed'.format(round(d, 3), counter))
break
while True:
try:
s = f.readline()
if broken_pipe:
log('kmsg: BrokenPipeError occurred, some messages lost')
broken_pipe = False
except BrokenPipeError:
if debug_kmsg:
log('debug kmsg: BrokenPipeError, sleep {}s'.format(
sleep_at_broken_pipe))
broken_pipe = True
sleep(sleep_at_broken_pipe)
continue
if (s[:2] == '3,' and 'ut of memory: Kill' in s):
if debug_kmsg:
log('debug kmsg: {}'.format(s[:-1]))
k_mono = float((s.split(',')[2])) / 1000000
krm = k_mono - kmsg_t_delta
oom_dict['t'] = krm
last_action_dict['t'] = krm
kmsg = s[:-1].rpartition(';')[2]
log('kmsg: {}'.format(kmsg))
update_stat_dict('kmsg: Out of memory: Kill')
continue
if s[:2] == '6,':
if ';oom_reaper: reaped process ' in s:
if debug_kmsg:
log('debug kmsg: {}'.format(s[:-1]))
k_mono = float((s.split(',')[2])) / 1000000
krm = k_mono - kmsg_t_delta
last_action_dict['t'] = None
kmsg = s[:-1].rpartition(';')[2]
log('kmsg: {}'.format(kmsg))
update_stat_dict('kmsg: oom_reaper: reaped process')
continue
if 'killed due to memory.oom.group set' in s:
if debug_kmsg:
log('debug kmsg: {}'.format(s[:-1]))
k_mono = float((s.split(',')[2])) / 1000000
krm = k_mono - kmsg_t_delta
oom_dict['t'] = krm
last_action_dict['t'] = krm
kmsg = s[:-1].rpartition(';')[2]
log('kmsg: {}'.format(kmsg))
update_stat_dict('killed due to memory.oom.group set')
continue
if (s[:2] == '4,' and ' invoked oom-killer: ' in s):
if debug_kmsg:
log('debug kmsg: {}'.format(s[:-1]))
k_mono = float((s.split(',')[2])) / 1000000
krm = k_mono - kmsg_t_delta
oom_dict['t'] = krm
last_action_dict['t'] = krm
kmsg = s[:-1].rpartition(';')[2]
log('kmsg: {}'.format(kmsg))
update_stat_dict('kmsg: invoked oom-killer')
print_stat_dict()
if post_action_gui_notifications:
send_notification(
'kmsg: Out of memory!', 'invoked oom-killer')
continue
############################################################################### ###############################################################################
@ -3077,8 +3298,16 @@ config = os.path.abspath(config)
print('Starting nohang with the config: {}'.format(config)) print('Starting nohang with the config: {}'.format(config))
separator_in = '>>' + '=' * 75 + '>>' # separator_in = '>>' + '=' * 75 + '>>'
separator_out = '<<' + '=' * 75 + '<<' # separator_out = '<<' + '=' * 75 + '<<'
# separator_in = '>>=== implement_corrective_action() ==================>>'
# separator_out = '<<=== exit from implement_corrective_action() ========<<'
separator_in = '>----- implement_corrective_action() ------------------>'
separator_out = '<----- exit from implement_corrective_action() --------<'
############################################################################### ###############################################################################
@ -3108,6 +3337,11 @@ soft_actions_list = []
# separator for optional parameters (that starts with @) # separator for optional parameters (that starts with @)
opt_separator = '///' opt_separator = '///'
check_kmsg = False
debug_kmsg = False
# stupid conf parsing, it needs refactoring # stupid conf parsing, it needs refactoring
try: try:
with open(config) as f: with open(config) as f:
@ -3123,6 +3357,16 @@ try:
etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1') etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1')
etc2_2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V2') etc2_2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V2')
if line.startswith('@check_kmsg'):
line = line.rstrip()
if line == '@check_kmsg':
check_kmsg = True
if line.startswith('@debug_kmsg'):
line = line.rstrip()
if line == '@debug_kmsg':
debug_kmsg = True
if (not a and not b and not c and not d and not etc and if (not a and not b and not c and not d and not etc and
not etc2 and not etc2_2): not etc2 and not etc2_2):
a = line.partition('=') a = line.partition('=')
@ -3698,7 +3942,8 @@ if print_proc_table_flag:
func_print_proc_table() func_print_proc_table()
if (low_memory_warnings_enabled or if (check_kmsg or
low_memory_warnings_enabled or
post_action_gui_notifications or post_action_gui_notifications or
check_warning_exe or check_warning_exe or
soft_actions or soft_actions or
@ -3831,6 +4076,27 @@ arcstats_path = '/proc/spl/kstat/zfs/arcstats'
ZFS = os.path.exists(arcstats_path) ZFS = os.path.exists(arcstats_path)
kmsg_path = '/dev/kmsg'
post_oom_delay = 1.0
kmsg_start_timeout = 10.0
sleep_at_broken_pipe = 0.1
oom_dict = dict()
oom_dict['t'] = None
if check_kmsg:
kmsg_test = is_kmsg_ok()
if kmsg_test is not None:
m_test0, test_string = kmsg_test
start_thread(check_kmsg_fn)
if ZFS: if ZFS:
log('WARNING: ZFS found. Available memory will not be calculated ' log('WARNING: ZFS found. Available memory will not be calculated '
'correctly (issue#89)') 'correctly (issue#89)')

View File

@ -14,7 +14,7 @@ RestartSec=0
CPUSchedulingResetOnFork=true CPUSchedulingResetOnFork=true
RestrictRealtime=yes RestrictRealtime=yes
TasksMax=20 TasksMax=25
MemoryMax=100M MemoryMax=100M
MemorySwapMax=100M MemorySwapMax=100M
@ -25,11 +25,13 @@ InaccessiblePaths=/home /root
ProtectKernelTunables=true ProtectKernelTunables=true
ProtectKernelModules=true ProtectKernelModules=true
ProtectControlGroups=true ProtectControlGroups=true
PrivateDevices=true ProtectHostname=true
PrivateTmp=true
MemoryDenyWriteExecute=yes MemoryDenyWriteExecute=yes
RestrictNamespaces=yes RestrictNamespaces=yes
LockPersonality=yes LockPersonality=yes
PrivateTmp=true
DeviceAllow=/dev/kmsg rw
DevicePolicy=closed
# Capabilities whitelist: # Capabilities whitelist:
# CAP_KILL is required to send signals # CAP_KILL is required to send signals
@ -37,11 +39,17 @@ LockPersonality=yes
# CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes # CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes
# CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files # CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files
# CAP_DAC_OVERRIDE fixes #94 # CAP_DAC_OVERRIDE fixes #94
# CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE are required to send GUI notifications # CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE
CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE # are required to send GUI notifications
# CAP_SYSLOG is required to check /dev/kmsg for OOM events
# `PrivateNetwork=true` breaks GUI notifications on oldstable distros (Debian 8, CentOS 7, Linux Mint 18) CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE \
# On modern distros you can set PrivateNetwork=true for security reasons CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID \
CAP_SYS_RESOURCE CAP_SYSLOG
# `PrivateNetwork=true` breaks GUI notifications on oldstable distros
# (Debian 8, CentOS 7, Linux Mint 18). On modern distros you can set
# PrivateNetwork=true for security reasons.
#PrivateNetwork=true #PrivateNetwork=true
# Set realtime CPU scheduling policy if you want # Set realtime CPU scheduling policy if you want

View File

@ -14,7 +14,7 @@ RestartSec=0
CPUSchedulingResetOnFork=true CPUSchedulingResetOnFork=true
RestrictRealtime=yes RestrictRealtime=yes
TasksMax=20 TasksMax=25
MemoryMax=100M MemoryMax=100M
MemorySwapMax=100M MemorySwapMax=100M
@ -25,11 +25,13 @@ InaccessiblePaths=/home /root
ProtectKernelTunables=true ProtectKernelTunables=true
ProtectKernelModules=true ProtectKernelModules=true
ProtectControlGroups=true ProtectControlGroups=true
PrivateDevices=true ProtectHostname=true
PrivateTmp=true
MemoryDenyWriteExecute=yes MemoryDenyWriteExecute=yes
RestrictNamespaces=yes RestrictNamespaces=yes
LockPersonality=yes LockPersonality=yes
PrivateTmp=true
DeviceAllow=/dev/kmsg rw
DevicePolicy=closed
# Capabilities whitelist: # Capabilities whitelist:
# CAP_KILL is required to send signals # CAP_KILL is required to send signals
@ -37,11 +39,17 @@ LockPersonality=yes
# CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes # CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes
# CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files # CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files
# CAP_DAC_OVERRIDE fixes #94 # CAP_DAC_OVERRIDE fixes #94
# CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE are required to send GUI notifications # CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE
CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE # are required to send GUI notifications
# CAP_SYSLOG is required to check /dev/kmsg for OOM events
# `PrivateNetwork=true` breaks GUI notifications on oldstable distros (Debian 8, CentOS 7, Linux Mint 18) CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE \
# On modern distros you can set PrivateNetwork=true for security reasons CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID \
CAP_SYS_RESOURCE CAP_SYSLOG
# `PrivateNetwork=true` breaks GUI notifications on oldstable distros
# (Debian 8, CentOS 7, Linux Mint 18). On modern distros you can set
# PrivateNetwork=true for security reasons.
#PrivateNetwork=true #PrivateNetwork=true
# Set realtime CPU scheduling policy if you want # Set realtime CPU scheduling policy if you want