From 2500c6cef7d1a10cddb47b8976335242af77c7ea Mon Sep 17 00:00:00 2001 From: Alexey Avramov Date: Mon, 10 Aug 2020 00:17:41 +0900 Subject: [PATCH] Add checking kmsg for OOM events --- conf/nohang/nohang-desktop.conf.in | 53 ++++-- conf/nohang/nohang.conf.in | 13 ++ conf/nohang/test.conf | 37 +++- src/nohang | 278 ++++++++++++++++++++++++++++- systemd/nohang-desktop.service.in | 22 ++- systemd/nohang.service.in | 22 ++- 6 files changed, 385 insertions(+), 40 deletions(-) diff --git a/conf/nohang/nohang-desktop.conf.in b/conf/nohang/nohang-desktop.conf.in index 9eda940..ad3e785 100644 --- a/conf/nohang/nohang-desktop.conf.in +++ b/conf/nohang/nohang-desktop.conf.in @@ -1,24 +1,39 @@ - This is the configuration file of the nohang daemon. - The configuration includes the following sections: +## This is the configuration file of the nohang daemon. - 1. Common zram settings - 2. Common PSI settings - 3. Poll rate - 4. Warnings and notifications - 5. Soft (SIGTERM) threshold - 6. Hard (SIGKILL) threshold - 7. Customize victim selection: adjusting badness of processes - 8. Customize soft corrective actions - 9. Misc settings - 10. Verbosity, debug, logging +## The configuration includes the following sections: +## 0. Check kernel messages for OOM events +## 1. Common zram settings +## 2. Common PSI settings +## 3. Poll rate +## 4. Warnings and notifications +## 5. Soft (SIGTERM) threshold +## 6. Hard (SIGKILL) threshold +## 7. Customize victim selection: adjusting badness of processes +## 8. Customize soft corrective actions +## 9. Misc settings +## 10. Verbosity, debug, logging - WARNING! - - Lines starting with #, tabs and whitespace characters are comments. - - Lines starting with @ contain optional parameters that may be repeated. - - All values are case sensitive. - - nohang doesn't forbid you to shoot yourself in the foot. Be careful! - - Restart the daemon after editing the file to apply the new settings. - - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang-desktop.conf +## WARNING! +## - Lines starting with #, tabs and whitespace characters are comments. +## - Lines starting with @ contain optional parameters that may be repeated. +## - All values are case sensitive. +## - nohang doesn't forbid you to shoot yourself in the foot. Be careful! +## - Restart the daemon after editing the file to apply the new settings. +## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf + +## To find config keys descriptions see man(8) nohang + +############################################################################### + +## 0. Check kernel messages for OOM events + +# @check_kmsg +## Type: boolean +## Comment/uncomment to disable/enable checking kmsg for OOM events + +# @debug_kmsg +## Type: boolean +## Comment/uncomment to disable/enable debug checking kmsg ############################################################################### diff --git a/conf/nohang/nohang.conf.in b/conf/nohang/nohang.conf.in index e05147d..0d5a3af 100644 --- a/conf/nohang/nohang.conf.in +++ b/conf/nohang/nohang.conf.in @@ -1,6 +1,7 @@ ## This is the configuration file of the nohang daemon. ## The configuration includes the following sections: +## 0. Check kernel messages for OOM events ## 1. Common zram settings ## 2. Common PSI settings ## 3. Poll rate @@ -24,6 +25,18 @@ ############################################################################### +## 0. Check kernel messages for OOM events + +# @check_kmsg +## Type: boolean +## Comment/uncomment to disable/enable checking kmsg for OOM events + +# @debug_kmsg +## Type: boolean +## Comment/uncomment to disable/enable debug checking kmsg + +############################################################################### + ## 1. Common zram settings zram_checking_enabled = False diff --git a/conf/nohang/test.conf b/conf/nohang/test.conf index eaaffbd..18a89ba 100644 --- a/conf/nohang/test.conf +++ b/conf/nohang/test.conf @@ -1,4 +1,39 @@ - This is nohang config file. +## This is the configuration file of the nohang daemon. + +## The configuration includes the following sections: +## 0. Check kernel messages for OOM events +## 1. Common zram settings +## 2. Common PSI settings +## 3. Poll rate +## 4. Warnings and notifications +## 5. Soft (SIGTERM) threshold +## 6. Hard (SIGKILL) threshold +## 7. Customize victim selection: adjusting badness of processes +## 8. Customize soft corrective actions +## 9. Misc settings +## 10. Verbosity, debug, logging + +## WARNING! +## - Lines starting with #, tabs and whitespace characters are comments. +## - Lines starting with @ contain optional parameters that may be repeated. +## - All values are case sensitive. +## - nohang doesn't forbid you to shoot yourself in the foot. Be careful! +## - Restart the daemon after editing the file to apply the new settings. +## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf + +## To find config keys descriptions see man(8) nohang + +############################################################################### + +## 0. Check kernel messages for OOM events + +# @check_kmsg +## Type: boolean +## Comment/uncomment to disable/enable checking kmsg for OOM events + +# @debug_kmsg +## Type: boolean +## Comment/uncomment to disable/enable debug checking kmsg ############################################################################### diff --git a/src/nohang b/src/nohang index 268b7a0..74c3673 100755 --- a/src/nohang +++ b/src/nohang @@ -228,7 +228,7 @@ def exe(cmd): proc.wait(timeout=exe_timeout) exit_status = proc.poll() t4 = monotonic() - log('Command-{} execution completed in {} sec; exit status' + log('Command-{} execution completed in {}s; exit status' ': {}'.format(cmd_num, round(t4 - t3, 3), exit_status)) except TimeoutExpired: proc.kill() @@ -352,7 +352,7 @@ def pop(cmd): cmd_num = cmd_num_dict['cmd_num'] if swap_total == 0: - wait_time = 10 + wait_time = 15 else: wait_time = 30 @@ -375,7 +375,7 @@ def pop(cmd): t4 = monotonic() if debug_gui_notifications: - log('Command-{} execution completed in {} sec; exit status' + log('Command-{} execution completed in {}s; exit status' ': {}'.format(cmd_num, round(t4 - t3, 3), err)) except TimeoutExpired: @@ -537,6 +537,10 @@ def send_notify_etc(pid, name, command): def check_config(): """ """ + log('\n0. Check kernel messages for OOM events') + log(' @check_kmsg: <{}>'.format(check_kmsg)) + log(' @debug_kmsg: <{}>'.format(debug_kmsg)) + log('\n1. Common zram settings') log(' zram_checking_enabled: {}'.format(zram_checking_enabled)) @@ -2352,6 +2356,32 @@ def log_meminfo(): )) +def is_post_oom_delay_exceeded(): + """ + """ + oom_t = oom_dict['t'] + + if oom_t is not None: + + post_oom_t = monotonic() - oom_t + + if post_oom_t < post_oom_delay: + + log('Time since OOM: {}s; post OOM delay ({}s) is not exceed' + 'ed'.format(round(post_oom_t, 3), post_oom_delay)) + + if debug_sleep: + log('Sleep {}s'.format(over_sleep)) + sleep(over_sleep) + + return False + else: + return True + + else: + return True + + def implement_corrective_action( threshold, mem_info_list, @@ -2368,6 +2398,11 @@ def implement_corrective_action( debug_corrective_action = True + post_oom_delay_exceeded = is_post_oom_delay_exceeded() + if not post_oom_delay_exceeded: + log(separator_out) + return psi_t0 + time0 = monotonic() nu = [] @@ -2440,6 +2475,11 @@ def implement_corrective_action( pid, victim_badness, name, victim_id = fff + post_oom_delay_exceeded = is_post_oom_delay_exceeded() + if not post_oom_delay_exceeded: + log(separator_out) + return psi_t0 + log('Recheck memory levels...') (masf_threshold, masf_info, mem_available, hard_threshold_min_swap_kb, @@ -2583,6 +2623,11 @@ def implement_corrective_action( start_action = monotonic() + post_oom_delay_exceeded = is_post_oom_delay_exceeded() + if not post_oom_delay_exceeded: + log(separator_out) + return psi_t0 + if soft_match: cmd = command.replace('$PID', pid).replace('$NAME', pid_to_name( @@ -2880,6 +2925,182 @@ def calculate_percent(arg_key): return mem_min_kb, mem_min_mb, mem_min_percent +def is_kmsg_ok(): + """ + """ + m_test0 = monotonic() + test_string = 'nohang: clock calibration: {}\n'.format(m_test0) + try: + write(kmsg_path, test_string) + except Exception as e: + log(e) + return None + try: + with open(kmsg_path): + pass + except Exception as e: + log(e) + return None + return m_test0, test_string + + +def check_kmsg_fn(): + """ + Checking kernel messages for OOM events. + + It catches lines like follow: + + 3,591,248095206,-;Out of memory: Kill process 1061 (tail) score 917 or sacr + ifice child + + 3,1699,7208057400,-;Out of memory: Killed process 9277 (tail) total-vm:7837 + 24kB, anon-rss:566796kB, file-rss:1508kB, shmem-rss:0kB, UID:1000 + + 3,728,398101038,-;Memory cgroup out of memory: Killed process 2497 (tail) t + otal-vm:6468944kB, anon-rss:3013188kB, file-rss:1676kB, shmem-rss:0kB, UID + : 1000 pgtables:12300kB oom_score_adj:0 + + 4,2928853,54035939870,-;tail invoked oom-killer: gfp_mask=0x100cca(GFP_HIGH + USER_MOVABLE), order=0, oom_score_adj=0 + + 6,2928994,54035940084,-;Tasks in /user.slice/user-1000.slice/session-235.sc + ope are going to be killed due to memory.oom.group set + + 6,2929028,54036119432,-;oom_reaper: reaped process 15512 (tail), now anon-r + ss:0kB, file-rss:0kB, shmem-rss:0kB + + """ + broken_pipe = False + start_t0 = monotonic() + counter = 0 + + with open(kmsg_path) as f: + + while True: + + d = monotonic() - start_t0 + if d > kmsg_start_timeout: + log('kmsg: cannot start in {}s'.format( + kmsg_start_timeout)) + return None + + try: + s = f.readline() + counter += 1 + if broken_pipe: + log('kmsg: BrokenPipeError occurred, some messages lost') + broken_pipe = False + except BrokenPipeError: + broken_pipe = True + if debug_kmsg: + log('debug kmsg: BrokenPipeError, sleep {}s'.format( + sleep_at_broken_pipe)) + sleep(sleep_at_broken_pipe) + continue + + if test_string in s: + + if debug_kmsg: + log('debug kmsg: {}'.format(s[:-1])) + + kmsg_mono0 = float(s.split(',')[2]) / 1000000 + kmsg_t_delta = kmsg_mono0 - m_test0 + + if debug_kmsg: + log('debug kmsg: kmsg_t_delta (kmsg_mono - test_mono)' + ': {}'.format(round(kmsg_t_delta, 3))) + + log('Checking kmsg for OOM events has started in {}s;' + ' {} lines parsed'.format(round(d, 3), counter)) + + break + + while True: + + try: + s = f.readline() + if broken_pipe: + log('kmsg: BrokenPipeError occurred, some messages lost') + broken_pipe = False + except BrokenPipeError: + if debug_kmsg: + log('debug kmsg: BrokenPipeError, sleep {}s'.format( + sleep_at_broken_pipe)) + broken_pipe = True + sleep(sleep_at_broken_pipe) + continue + + if (s[:2] == '3,' and 'ut of memory: Kill' in s): + + if debug_kmsg: + log('debug kmsg: {}'.format(s[:-1])) + + k_mono = float((s.split(',')[2])) / 1000000 + krm = k_mono - kmsg_t_delta + oom_dict['t'] = krm + last_action_dict['t'] = krm + kmsg = s[:-1].rpartition(';')[2] + log('kmsg: {}'.format(kmsg)) + + update_stat_dict('kmsg: Out of memory: Kill') + + continue + + if s[:2] == '6,': + + if ';oom_reaper: reaped process ' in s: + + if debug_kmsg: + log('debug kmsg: {}'.format(s[:-1])) + + k_mono = float((s.split(',')[2])) / 1000000 + krm = k_mono - kmsg_t_delta + last_action_dict['t'] = None + kmsg = s[:-1].rpartition(';')[2] + log('kmsg: {}'.format(kmsg)) + + update_stat_dict('kmsg: oom_reaper: reaped process') + + continue + + if 'killed due to memory.oom.group set' in s: + + if debug_kmsg: + log('debug kmsg: {}'.format(s[:-1])) + + k_mono = float((s.split(',')[2])) / 1000000 + krm = k_mono - kmsg_t_delta + oom_dict['t'] = krm + last_action_dict['t'] = krm + kmsg = s[:-1].rpartition(';')[2] + log('kmsg: {}'.format(kmsg)) + + update_stat_dict('killed due to memory.oom.group set') + + continue + + if (s[:2] == '4,' and ' invoked oom-killer: ' in s): + + if debug_kmsg: + log('debug kmsg: {}'.format(s[:-1])) + + k_mono = float((s.split(',')[2])) / 1000000 + krm = k_mono - kmsg_t_delta + oom_dict['t'] = krm + last_action_dict['t'] = krm + kmsg = s[:-1].rpartition(';')[2] + log('kmsg: {}'.format(kmsg)) + + update_stat_dict('kmsg: invoked oom-killer') + print_stat_dict() + + if post_action_gui_notifications: + send_notification( + 'kmsg: Out of memory!', 'invoked oom-killer') + + continue + + ############################################################################### @@ -3077,8 +3298,16 @@ config = os.path.abspath(config) print('Starting nohang with the config: {}'.format(config)) -separator_in = '>>' + '=' * 75 + '>>' -separator_out = '<<' + '=' * 75 + '<<' +# separator_in = '>>' + '=' * 75 + '>>' +# separator_out = '<<' + '=' * 75 + '<<' + + +# separator_in = '>>=== implement_corrective_action() ==================>>' +# separator_out = '<<=== exit from implement_corrective_action() ========<<' + + +separator_in = '>----- implement_corrective_action() ------------------>' +separator_out = '<----- exit from implement_corrective_action() --------<' ############################################################################### @@ -3108,6 +3337,11 @@ soft_actions_list = [] # separator for optional parameters (that starts with @) opt_separator = '///' + +check_kmsg = False +debug_kmsg = False + + # stupid conf parsing, it needs refactoring try: with open(config) as f: @@ -3123,6 +3357,16 @@ try: etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1') etc2_2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V2') + if line.startswith('@check_kmsg'): + line = line.rstrip() + if line == '@check_kmsg': + check_kmsg = True + + if line.startswith('@debug_kmsg'): + line = line.rstrip() + if line == '@debug_kmsg': + debug_kmsg = True + if (not a and not b and not c and not d and not etc and not etc2 and not etc2_2): a = line.partition('=') @@ -3698,7 +3942,8 @@ if print_proc_table_flag: func_print_proc_table() -if (low_memory_warnings_enabled or +if (check_kmsg or + low_memory_warnings_enabled or post_action_gui_notifications or check_warning_exe or soft_actions or @@ -3831,6 +4076,27 @@ arcstats_path = '/proc/spl/kstat/zfs/arcstats' ZFS = os.path.exists(arcstats_path) + +kmsg_path = '/dev/kmsg' + +post_oom_delay = 1.0 + +kmsg_start_timeout = 10.0 + +sleep_at_broken_pipe = 0.1 + +oom_dict = dict() + +oom_dict['t'] = None + + +if check_kmsg: + kmsg_test = is_kmsg_ok() + if kmsg_test is not None: + m_test0, test_string = kmsg_test + start_thread(check_kmsg_fn) + + if ZFS: log('WARNING: ZFS found. Available memory will not be calculated ' 'correctly (issue#89)') diff --git a/systemd/nohang-desktop.service.in b/systemd/nohang-desktop.service.in index 78016eb..aea929f 100644 --- a/systemd/nohang-desktop.service.in +++ b/systemd/nohang-desktop.service.in @@ -14,7 +14,7 @@ RestartSec=0 CPUSchedulingResetOnFork=true RestrictRealtime=yes -TasksMax=20 +TasksMax=25 MemoryMax=100M MemorySwapMax=100M @@ -25,11 +25,13 @@ InaccessiblePaths=/home /root ProtectKernelTunables=true ProtectKernelModules=true ProtectControlGroups=true -PrivateDevices=true -PrivateTmp=true +ProtectHostname=true MemoryDenyWriteExecute=yes RestrictNamespaces=yes LockPersonality=yes +PrivateTmp=true +DeviceAllow=/dev/kmsg rw +DevicePolicy=closed # Capabilities whitelist: # CAP_KILL is required to send signals @@ -37,11 +39,17 @@ LockPersonality=yes # CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes # CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files # CAP_DAC_OVERRIDE fixes #94 -# CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE are required to send GUI notifications -CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE +# CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE +# are required to send GUI notifications +# CAP_SYSLOG is required to check /dev/kmsg for OOM events -# `PrivateNetwork=true` breaks GUI notifications on oldstable distros (Debian 8, CentOS 7, Linux Mint 18) -# On modern distros you can set PrivateNetwork=true for security reasons +CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE \ +CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID \ +CAP_SYS_RESOURCE CAP_SYSLOG + +# `PrivateNetwork=true` breaks GUI notifications on oldstable distros +# (Debian 8, CentOS 7, Linux Mint 18). On modern distros you can set +# PrivateNetwork=true for security reasons. #PrivateNetwork=true # Set realtime CPU scheduling policy if you want diff --git a/systemd/nohang.service.in b/systemd/nohang.service.in index 189c9ea..e44ae74 100644 --- a/systemd/nohang.service.in +++ b/systemd/nohang.service.in @@ -14,7 +14,7 @@ RestartSec=0 CPUSchedulingResetOnFork=true RestrictRealtime=yes -TasksMax=20 +TasksMax=25 MemoryMax=100M MemorySwapMax=100M @@ -25,11 +25,13 @@ InaccessiblePaths=/home /root ProtectKernelTunables=true ProtectKernelModules=true ProtectControlGroups=true -PrivateDevices=true -PrivateTmp=true +ProtectHostname=true MemoryDenyWriteExecute=yes RestrictNamespaces=yes LockPersonality=yes +PrivateTmp=true +DeviceAllow=/dev/kmsg rw +DevicePolicy=closed # Capabilities whitelist: # CAP_KILL is required to send signals @@ -37,11 +39,17 @@ LockPersonality=yes # CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes # CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files # CAP_DAC_OVERRIDE fixes #94 -# CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE are required to send GUI notifications -CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE +# CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE +# are required to send GUI notifications +# CAP_SYSLOG is required to check /dev/kmsg for OOM events -# `PrivateNetwork=true` breaks GUI notifications on oldstable distros (Debian 8, CentOS 7, Linux Mint 18) -# On modern distros you can set PrivateNetwork=true for security reasons +CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE \ +CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID \ +CAP_SYS_RESOURCE CAP_SYSLOG + +# `PrivateNetwork=true` breaks GUI notifications on oldstable distros +# (Debian 8, CentOS 7, Linux Mint 18). On modern distros you can set +# PrivateNetwork=true for security reasons. #PrivateNetwork=true # Set realtime CPU scheduling policy if you want