diff --git a/README.md b/README.md
index 86db404..ceceaa8 100644
--- a/README.md
+++ b/README.md
@@ -176,7 +176,7 @@ See also `man journalctl`.
## Known problems
- Awful documentation
-- Slowly starting, slowly looking for a victim, especially when using swapspace
+- Slowly starting, slowly looking for a victim, especially when using swapspace (although this should be enough for more than 95% of all cases, IMHO)
- It is written in an interpreted language and is actually a prototype
## Contribution
@@ -194,8 +194,11 @@ Please create [issues](https://github.com/hakavlad/nohang/issues). Use cases, fe
- Display `UID`, `oom_score`, `oom_score_adj`, `VmSize`, `RssAnon`, `RssFile`, `RssShmem` and `cmdline` of the victim in corrective action reports
- Print in terminal with colors
- Print statistics on corrective actions after each corrective action
- - Optimize limiting `oom_score_adj`: now it can works without UID=0
- - Optimize GUI warnings: find env without run `ps` and `env`
+ - Improve poll rate algorithm
+ - Improve limiting `oom_score_adj`: now it can works without UID=0
+ - Improve GUI warnings:
+ - Find env without run `ps` and `env`
+ - Handle all timeouts when notify-send starts
- Fix conf parsing: use of `line.partition('=')` instead of `line.split('=')`
- Add `PSI` support (using `/proc/pressure/memory`, need Linux 4.20+)
- Add `oom-sort`
diff --git a/nohang b/nohang
index a2221aa..fb74e67 100755
--- a/nohang
+++ b/nohang
@@ -21,10 +21,15 @@ self_pid = str(os.getpid())
self_uid = os.geteuid()
if self_uid == 0:
root = True
+else:
+ root = False
-wait_time = 2
-cache_time = 30
-cache_path = '/dev/shm/nohang_env_cache'
+wait_time = 14
+
+max_sleep_time = 2
+min_sleep_time = 0.1
+
+notify_helper_path = '/usr/bin/nohang_notify_helper'
psi_path = '/proc/pressure/memory'
psi_support = os.path.exists(psi_path)
@@ -119,79 +124,6 @@ def format_time(t):
return '{} h {} min {} sec'.format(h, m, s)
-def re_pid_environ(pid):
- """
- read environ of 1 process
- returns tuple with USER, DBUS, DISPLAY like follow:
- ('user', 'DISPLAY=:0',
- 'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus')
- returns None if these vars is not in /proc/[pid]/environ
- """
- display_env = 'DISPLAY='
- dbus_env = 'DBUS_SESSION_BUS_ADDRESS='
- user_env = 'USER='
- try:
- env = str(rline1('/proc/' + pid + '/environ'))
- if display_env in env and dbus_env in env and user_env in env:
- env_list = env.split('\x00')
-
- # iterating over a list of process environment variables
- for i in env_list:
- if i.startswith(user_env):
- user = i
- continue
-
- if i.startswith(display_env):
- display = i[:10]
- continue
-
- if i.startswith(dbus_env):
- #if ',guid=' in i:
- # return None
- dbus = i
- continue
-
- if i.startswith('HOME='):
- # exclude Display Manager's user
- if i.startswith('HOME=/var'):
- return None
-
- env = user.partition('USER=')[2], display, dbus
- return env
-
- except FileNotFoundError:
- return None
- except ProcessLookupError:
- return None
-
-
-def root_notify_env():
- """return set(user, display, dbus)"""
- unsorted_envs_list = []
- # iterates over processes, find processes with suitable env
- for pid in os.listdir('/proc'):
- if pid[0].isdecimal() is False:
- continue
- one_env = re_pid_environ(pid)
- unsorted_envs_list.append(one_env)
- env = set(unsorted_envs_list)
- env.discard(None)
-
- new_env = []
- end = []
- for i in env:
- #print(i)
- key = i[0] + i[1]
- #print(key)
- if key not in end:
- end.append(key)
- new_env.append(i)
- else:
- continue
- #print(new_env)
- return new_env
-
-
def string_to_float_convert_test(string):
"""Try to interprete string values as floats."""
try:
@@ -351,9 +283,28 @@ def pid_to_uid(pid):
return line.split('\t')[1]
+def notify_send_wait(title, body):
+ with Popen(['notify-send', '--icon=dialog-warning', title, body]) as proc:
+ try:
+ proc.wait(timeout=wait_time)
+ except TimeoutExpired:
+ proc.kill()
+ print('TimeoutExpired: notify-send {} {}'.format(title, body))
+
+
+def notify_helper(title, body):
+ with Popen([notify_helper_path, title, body]) as proc:
+ try:
+ proc.wait(timeout=wait_time)
+ except TimeoutExpired:
+ proc.kill()
+ print('TimeoutExpired: nohang_notify_helper {} {}'.format(title, body))
+
+
def send_notify_warn():
"""
Look for process with maximum 'badness' and warn user with notification.
+ (implement Low memory warnings)
"""
# find process with max badness
fat_tuple = fattest()
@@ -378,23 +329,10 @@ def send_notify_warn():
if root: # If nohang was started by root
# send notification to all active users with special script
-
- # теперь можно напрямую уведомлять из кэша если он не устарел
-
- Popen([
- '/usr/bin/nohang_notify_low_mem',
- '--mem', low_mem_percent,
- '--pid', pid,
- '--name', name
- ])
-
+ notify_helper(title, body)
else: # Or by regular user
# send notification to user that runs this nohang
- try:
- Popen(['notify-send', '--icon=dialog-warning',
- '{}'.format(title), '{}'.format(body)]).wait(wait_time)
- except TimeoutExpired:
- print('TimeoutExpired: ' + 'notify low mem')
+ notify_send_wait(title, body)
def send_notify(signal, name, pid):
@@ -413,26 +351,10 @@ def send_notify(signal, name, pid):
'&', '*'))
if root:
# send notification to all active users with notify-send
- b = root_notify_env()
- if len(b) > 0:
- for i in b:
- username, display_env, dbus_env = i[0], i[1], i[2]
- #if '1000' in dbus_env:
- # continue
- #print(username, display_env, dbus_env)
- try:
- Popen(['sudo', '-u', username, 'env', display_env,
- dbus_env, 'notify-send', '--icon=dialog-warning',
- '{}'.format(title), '{}'.format(body)]).wait(wait_time)
- except TimeoutExpired:
- print('TimeoutExpired: ' + 'notify send signal')
+ notify_helper(title, body)
else:
# send notification to user that runs this nohang
- try:
- Popen(['notify-send', '--icon=dialog-warning',
- '{}'.format(title), '{}'.format(body)]).wait(wait_time)
- except TimeoutExpired:
- print('TimeoutExpired: ' + 'notify send signal')
+ notify_send_wait(title, body)
def send_notify_etc(pid, name, command):
@@ -448,20 +370,10 @@ def send_notify_etc(pid, name, command):
pid, name.replace('&', '*'), command.replace('&', '*'))
if root:
# send notification to all active users with notify-send
- b = root_notify_env()
- if len(b) > 0:
- for i in b:
- username, display_env, dbus_env = i[0], i[1], i[2]
- try:
- Popen(['sudo', '-u', username, 'env', display_env,
- dbus_env, 'notify-send', '--icon=dialog-warning',
- '{}'.format(title), '{}'.format(body)]).wait(wait_time)
- except TimeoutExpired:
- print('TimeoutExpired: notify run command')
+ notify_send_wait(title, body)
else:
# send notification to user that runs this nohang
- Popen(['notify-send', '--icon=dialog-warning', '{}'.format(title), '{}'
- .format(body)])
+ notify_send_wait(title, body)
def sleep_after_send_signal(signal):
@@ -789,9 +701,20 @@ def find_victim_and_send_signal(signal):
def sleep_after_check_mem():
"""Specify sleep times depends on rates and avialable memory."""
- t_mem = mem_available / rate_mem
- t_swap = swap_free / rate_swap
- t_zram = (mem_total - mem_used_zram) / rate_zram
+
+ if mem_min_sigkill_kb < mem_min_sigterm_kb:
+ mem_point = mem_available - mem_min_sigterm_kb
+ else:
+ mem_point = mem_available - mem_min_sigkill_kb
+
+ if swap_min_sigkill_kb < swap_min_sigterm_kb:
+ swap_point = swap_free - swap_min_sigterm_kb
+ else:
+ swap_point = swap_free - swap_min_sigkill_kb
+
+ t_mem = mem_point / rate_mem
+ t_swap = swap_point / rate_swap
+ t_zram = (mem_total * 0.9 - mem_used_zram) / rate_zram
t_mem_swap = t_mem + t_swap
t_mem_zram = t_mem + t_zram
@@ -801,17 +724,20 @@ def sleep_after_check_mem():
else:
t = t_mem_zram
- max_sleep_time = 1
if t > max_sleep_time:
- t = 1
+ t = max_sleep_time
+ elif t < min_sleep_time:
+ t = min_sleep_time
+ else:
+ pass
try:
if print_sleep_periods:
- print('sleep', round(t, 2),
- ' (t_mem={}, t_swap={}, t_zram={})'.format(
- round(t_mem, 2),
- round(t_swap, 2),
- round(t_zram, 2)))
+ print('sleep', round(t, 2))
+ # ' (t_mem={}, t_swap={}, t_zram={})'.format(
+ #round(t_mem, 2),
+ #round(t_swap, 2),
+ #round(t_zram, 2)))
stdout.flush()
sleep(t)
except KeyboardInterrupt:
@@ -1425,68 +1351,10 @@ print('Startup time:',
print('Monitoring started!')
-
-def save_env_cache():
- z = '{}\n'.format(int(time()))
- a = root_notify_env()
- # print(a)
- for i in a:
- z = z + '{}\x00{}\x00{}\n'.format(i[0], i[1], i[2])
- write(cache_path, z)
- os.chmod(cache_path, 0000)
- return a
-
-
-def read_env_cache():
- x, y = [], []
- try:
- with open(cache_path) as f:
- for n, line in enumerate(f):
- if n is 0:
- t = line[:-1]
- y.append(t)
- continue
- if n > 0:
- x.append(line[:-1].split('\x00'))
- except FileNotFoundError:
- return None
- y.append(x)
- return y
-
-
-def root_env_cache():
- cache = read_env_cache()
- if cache is None:
- print('cache not found, get new env and cache it')
- return save_env_cache()
- delta_t = time() - int(cache[0])
- if delta_t > cache_time:
- print('cache time: {}, delta: {}, '
- 'get new env and cache it'.format(
- cache_time, round(delta_t)))
- save_env_cache()
- return root_notify_env()
- else:
- print('cache time: {}, delta: {}, '
- 'get cached env'.format(
- cache_time, round(delta_t)))
- return cache[1]
-
-
-t1 = time()
-# root_env_cache()
-t2 = time()
-# print(t2 - t1)
-
-
stdout.flush()
-# exit()
-
-
sigterm_psi = sigterm_psi_avg10
sigkill_psi = sigkill_psi_avg10
-# avg_min_time = 4
psi_min_sleep_time_after_action = psi_avg10_sleep_time
@@ -1494,12 +1362,9 @@ psi_min_sleep_time_after_action = psi_avg10_sleep_time
if psi_support and not ignore_psi:
- # ta0 = time()
- # a0 = psi_mem_some_avg_total()
kill_psi_t0 = time() + psi_avg10_sleep_time
term_psi_t0 = time() + psi_avg10_sleep_time
-
avg_value = ''
while True:
diff --git a/nohang.conf b/nohang.conf
index 8fdf791..be454b2 100644
--- a/nohang.conf
+++ b/nohang.conf
@@ -56,7 +56,7 @@ swap_min_sigkill = 5 %
usual hang level, not recommended to set very high.
Can be specified in % and M. Valid values are floating-point
- numbers from the range [0; 100] %.
+ numbers from the range [0; 90] %.
zram_max_sigterm = 50 %
zram_max_sigkill = 55 %
@@ -93,8 +93,8 @@ psi_avg10_sleep_time = 60
Valid values are positive floating-point numbers.
-rate_mem = 6
-rate_swap = 3
+rate_mem = 4
+rate_swap = 2
rate_zram = 1
See also https://github.com/rfjakob/earlyoom/issues/61
@@ -261,7 +261,7 @@ gui_notifications = True
Enable GUI notifications about the low level of available memory.
Valid values are True and False.
-gui_low_memory_warnings = True
+gui_low_memory_warnings = False
Минимальное время между отправками уведомлений в секундах.
Valid values are floating-point numbers from the range [1; 300].
@@ -304,5 +304,5 @@ print_mem_check_results = True
Print sleep periods between memory checks.
Valid values are True and False.
-print_sleep_periods = False
+print_sleep_periods = True
diff --git a/nohang_notify_low_mem b/nohang_notify_helper
similarity index 64%
rename from nohang_notify_low_mem
rename to nohang_notify_helper
index 72fdfa0..b40e381 100755
--- a/nohang_notify_low_mem
+++ b/nohang_notify_helper
@@ -1,53 +1,17 @@
#!/usr/bin/env python3
+#
+# Usage:
+# ./nohang_notify_helper "title" "body"
-# nohang_notify_low_mem --mem '14% 12%' --name 'stress' --pid '6666'
-
-# need UID=0
-
-# output:
-# Low memory: 14% 12%
-# Fattest process: 6666, stress
-
-# need to remove this slow and fat parser
-from argparse import ArgumentParser
-
+from sys import argv
from os import listdir
from subprocess import Popen, TimeoutExpired
-wait_time = 10
+if len(argv) < 2 or argv[1] == "-h" or argv[1] == "--help":
+ print('Usage: ./nohang_notify_helper "title" "body"')
+ exit(1)
-parser = ArgumentParser()
-
-parser.add_argument(
- '--mem',
- help="""available memory percent (15%, for example)""",
- default=None,
- type=str
-)
-
-parser.add_argument(
- '--pid',
- help="""pid""",
- default=None,
- type=str
-)
-
-parser.add_argument(
- '--name',
- help="""process name""",
- default=None,
- type=str
-)
-
-args = parser.parse_args()
-
-pid = args.pid
-name = args.name
-mem = args.mem
-
-title = 'Low memory: {}'.format(mem)
-
-body = 'Fattest process: {}, {}'.format(pid, name)
+wait_time = 12
display_env = 'DISPLAY='
dbus_env = 'DBUS_SESSION_BUS_ADDRESS='
@@ -69,9 +33,6 @@ def re_pid_environ(pid):
'DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus')
returns None if these vars is not in /proc/[pid]/environ
"""
- display_env = 'DISPLAY='
- dbus_env = 'DBUS_SESSION_BUS_ADDRESS='
- user_env = 'USER='
try:
env = str(rline1('/proc/' + pid + '/environ'))
if display_env in env and dbus_env in env and user_env in env:
@@ -88,8 +49,6 @@ def re_pid_environ(pid):
continue
if i.startswith(dbus_env):
- #if ',guid=' in i:
- # return None
dbus = i
continue
@@ -119,27 +78,27 @@ def root_notify_env():
env = set(unsorted_envs_list)
env.discard(None)
+ # deduplicate dbus
new_env = []
end = []
for i in env:
- #print(i)
key = i[0] + i[1]
- #print(key)
if key not in end:
end.append(key)
new_env.append(i)
else:
continue
- #print(new_env)
+
return new_env
-b = root_notify_env()
+list_with_envs = root_notify_env()
+
# if somebody logged in with GUI
-if len(b) > 0:
+if len(list_with_envs) > 0:
# iterating over logged-in users
- for i in b:
+ for i in list_with_envs:
username, display_env, dbus_env = i[0], i[1], i[2]
display_tuple = display_env.partition('=')
dbus_tuple = dbus_env.partition('=')
@@ -147,16 +106,16 @@ if len(b) > 0:
dbus_key, dbus_value = dbus_tuple[0], dbus_tuple[2]
with Popen(['sudo', '-u', username,
- 'notify-send', '--icon=dialog-warning',
- '{}'.format(title), '{}'.format(body)
- ], env={
+ 'notify-send', '--icon=dialog-warning',
+ argv[1], argv[2]
+ ], env={
display_key: display_value,
dbus_key: dbus_value
- }) as proc:
- try:
- proc.wait(timeout=wait_time)
- except TimeoutExpired:
- proc.kill()
- print('TimeoutExpired: notify' + username)
+ }) as proc:
+ try:
+ proc.wait(timeout=wait_time)
+ except TimeoutExpired:
+ proc.kill()
+ print('TimeoutExpired: notify' + username)
else:
- print('Low memory warnings: nobody logged in with GUI. Nothing to do.')
+ print('Nobody logged-in with GUI. Nothing to do.')