diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5386441..af2e4ee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,7 +31,6 @@
- [x] `oom-sort`
- [x] `psi-top`
- [x] `psi-monitor`
- - [x] `i-memhog`
- [x] Improve poll rate algorithm
- [x] Fixed Makefile for installation on CentOS 7 (remove gzip `-k` option).
- [x] Added `max_post_sigterm_victim_lifetime` option: send SIGKILL to the victim if it doesn't respond to SIGTERM for a certain time
diff --git a/Makefile b/Makefile
index d3fb60f..aa629df 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,6 @@ install:
install -m0755 ./oom-sort $(DESTDIR)/$(PREFIX)/usr/bin/oom-sort
install -m0755 ./psi-top $(DESTDIR)/$(PREFIX)/usr/bin/psi-top
install -m0755 ./psi-monitor $(DESTDIR)/$(PREFIX)/usr/bin/psi-monitor
- install -m0755 ./i-memhog $(DESTDIR)/$(PREFIX)/usr/bin/i-memhog
install -d $(DESTDIR)/$(PREFIX)/etc/nohang
-git describe --tags --long --dirty > ./version
@@ -41,7 +40,6 @@ uninstall:
rm -fv $(PREFIX)/usr/bin/oom-sort
rm -fv $(PREFIX)/usr/bin/psi-top
rm -fv $(PREFIX)/usr/bin/psi-monitor
- rm -fv $(PREFIX)/usr/bin/i-memhog
rm -fv $(PREFIX)/usr/share/man/man1/nohang.1.gz
rm -fv $(PREFIX)/usr/share/man/man1/oom-sort.1.gz
rm -fv $(PREFIX)/lib/systemd/system/nohang.service
diff --git a/README.md b/README.md
index 6f9e408..040b162 100644
--- a/README.md
+++ b/README.md
@@ -292,7 +292,7 @@ See also `man journalctl`.
You can also enable `separate_log` in the config to logging in `/var/log/nohang/nohang.log`.
-## Additional tools: oom-sort, psi-top, psi-monitor, i-memhog
+## Additional tools: oom-sort, psi-top, psi-monitor
### oom-sort
@@ -503,10 +503,6 @@ some 0.29 7.58 14.58 | full 0.28 6.92 13.24
```
-### i-memhog
-
-`i-memhog` is an interactive memory hog for testing purposes.
-
## Contribution
Use cases, feature requests and any questions are [welcome](https://github.com/hakavlad/nohang/issues).
diff --git a/i-memhog b/i-memhog
deleted file mode 100755
index 198461a..0000000
--- a/i-memhog
+++ /dev/null
@@ -1,315 +0,0 @@
-#!/usr/bin/env python3
-
-
-from signal import signal, SIGTERM
-from time import sleep
-from sys import exit
-import os
-
-
-# чек общей доступной, для lim2avail
-def total_mem_available():
-
- with open('/proc/meminfo') as file:
- mem_list = file.readlines()
-
- mem_available = meminfo_num(mem_list, mem_available_index)
- swap_free = meminfo_num(mem_list, swap_free_index)
-
- return round((swap_free + mem_available) / 1024) # MiB
-
-
-# добитие байтами рандома
-def terminal():
- ex = []
- while True:
- try:
- ex.append(os.urandom(1))
- except MemoryError:
- continue
-
-
-def meminfo_num(mem_list, index):
- return int(mem_list[index].split(':')[1].split(' ')[-2])
-
-
-# выдача основных показателей meminfo, KiB
-def mem_check_main():
-
- with open('/proc/meminfo') as file:
- mem_list = file.readlines()
-
- mem_available = meminfo_num(mem_list, mem_available_index)
- swap_total = meminfo_num(mem_list, swap_total_index)
- swap_free = meminfo_num(mem_list, swap_free_index)
-
- return mem_available, swap_total, swap_free
-
-
-def signal_handler(signum, frame):
- print('Got signal {}'.format(signum))
- # sleep(1)
- # exit()
-
-
-def meminfo():
-
- # получаем сырой mem_list
- with open('/proc/meminfo') as file:
- mem_list = file.readlines()
-
- # получаем список названий позиций: MemTotal etc
- mem_list_names = []
- for s in mem_list:
- mem_list_names.append(s.split(':')[0])
-
- # ищем MemAvailable, обрабатываем исключение
- try:
- mem_available_index = mem_list_names.index('MemAvailable')
- except ValueError:
- print("Your Linux kernel is too old (3.14+ requied), bye!")
- # исключение для ядер < 3.14, не определяющих MemAvailable
- exit()
-
- # ищем позиции SwapTotl и SwapFree
- swap_total_index = mem_list_names.index('SwapTotal')
- swap_free_index = mem_list_names.index('SwapFree')
-
- buffers_index = mem_list_names.index('Buffers')
- cached_index = mem_list_names.index('Cached')
- active_index = mem_list_names.index('Active')
- inactive_index = mem_list_names.index('Inactive')
- shmem_index = mem_list_names.index('Shmem')
-
- # ищем значение MemTotal в KiB
- mem_total = int(mem_list[0].split(':')[1].split(' ')[-2])
-
- return mem_total, mem_available_index, swap_total_index, swap_free_index, buffers_index, cached_index, active_index, inactive_index, shmem_index
-
-
-meminfo_tuple = meminfo()
-
-mem_total = meminfo_tuple[0]
-mem_available_index = meminfo_tuple[1]
-swap_total_index = meminfo_tuple[2]
-swap_free_index = meminfo_tuple[3]
-
-buffers_index = meminfo_tuple[4]
-cached_index = meminfo_tuple[5]
-active_index = meminfo_tuple[6]
-inactive_index = meminfo_tuple[7]
-shmem_index = meminfo_tuple[8]
-
-
-# печать показателей на этапах работы
-def print_mem():
-
- mem_tup = mem_check_main()
-
- mem_available = mem_tup[0]
- swap_total = mem_tup[1]
- swap_free = mem_tup[2]
-
- print(
- 'MemAvailable: ',
- round(
- mem_available /
- 1024 /
- 1024,
- 3),
- 'GiB,',
- round(
- mem_available /
- 1024),
- 'MiB,',
- round(
- mem_available /
- mem_total *
- 100,
- 1),
- '%')
-
- if swap_total != 0:
- print(
- 'SwapFree: ',
- round(
- swap_free /
- 1024 /
- 1024,
- 3),
- 'GiB,',
- round(
- swap_free /
- 1024),
- 'MiB,',
- round(
- swap_free /
- swap_total *
- 100,
- 1),
- '%')
- print('Total Free: ',
- round((mem_available + swap_free) / 1024 / 1024,
- 3),
- 'GiB,',
- round((mem_available + swap_free) / 1024),
- 'MiB,',
- round((mem_available + swap_free) / (mem_total + swap_total) * 100,
- 1),
- '%')
- else:
- print(
- 'Swap disabled'
- )
-
-
-# бесконечный жор
-def inf():
-
- print(
- 'Вводите целые неотрицательные числа. Чем больше, тем быстрее потребление памяти.\n1000 same обеспечивает потребление на уровне полтора гиг в секунду,\nurandom работает на скорости максимум 170 M/s'
- )
- same = input("same: ")
- urandom = input("urandom: ")
-
- expanding_list = []
-
- print(
- 'Процесс неограниченного потребления пошёл... Press Ctrl + C for exit'
- )
-
- while True:
- try:
- expanding_list.append(os.urandom(int(urandom)))
- expanding_list.append('#' * int(same))
- except MemoryError:
- print('MemoryError, start побайтовая добивалка!')
- terminal()
-
-
-def selfterm():
- os.kill(os.getpid(), signal.SIGTERM)
-
-
-# жор числп гиг
-def lim():
-
- expanding_list = []
-
- n = input('На сколько гигабайт уменьшить доступную память?\n: ')
-
- print('Погнали тратить ' + n + ' гиг...')
-
- i = 0
-
- while True:
-
- i += 1
-
- try:
- expanding_list.append(os.urandom(int(100)))
- expanding_list.append('#' * int(300))
- except MemoryError:
- print('MemoryError!')
- break
- if i > 2020202 * int(n):
- print('DONE')
- break
-
- return expanding_list
-
-
-# жор до остатка мегабайт
-def lim2avail():
-
- expanding_list = []
-
- n = input(
- 'Сколько мегабайт общей доступной памяти (MemAvailable + SwapFree) оставить?\nВведите целое положительное число: '
- )
-
- # проверка на целое положительное
- if n.isdigit():
- n = int(n)
- else:
- print(
- 'Вы ввели не целое положительное число'
- )
- return 0
-
- if n == 0:
- print(
- 'Вы ввели не целое положительное число'
- )
- return 0
-
- print(
- 'Погнали уменьшать доступную память до уровня ниже ' +
- str(n) +
- ' MiB...')
-
- while True:
- try:
- expanding_list.append(os.urandom(5000))
- expanding_list.append('#' * 5000)
- except MemoryError:
- print('MemoryError!')
- break
- if total_mem_available() <= n:
- print('DONE')
- break
-
- return expanding_list
-
-
-print('WARNING: эта прога способна потратить память и повесить систему, будьте осторожны.')
-print('При ее работе следите за показателями памяти.')
-
-
-print('Ignore SIGTERM? (y|n)')
-
-sss = input(': ')
-
-if sss == 'y':
- signal(SIGTERM, signal_handler)
- print('The SIGTERM signal will be ignored')
-else:
- print('The SIGTERM signal will NOT be ignored')
-
-
-ex_list = []
-
-try:
- while True:
-
- print()
- print_mem()
- print()
- print('Выберите вариант из списка ниже')
- print('8 или i или I - запустить бесконечное потребление, предложив выбрать скорость потребления и энтропию')
- print('7 или l или L - запустить ограниченное потребление заданного числа гигов')
- print('6 или a или A - жрать память пока количество доступной памяти не опустится ниже заданного')
- print('0 или с или С - очистить накопления при их наличии')
- print('q или любой другой символ - выход (можно просто нажать Enter)')
-
- li = input(': ')
-
- if li is 'l' or li is 'L' or li is '7':
- x = lim()
- ex_list.append(x)
- elif li is 'i' or li is 'I' or li is '8':
- inf()
- elif li is 'c' or li is 'C' or li is '0':
- ex_list = []
- x = 0
- y = 0
- elif li is '6' or li is 'a' or li is 'A':
- y = lim2avail()
- ex_list.append(y)
- else:
- exit()
-
-except KeyboardInterrupt:
- print()
- print_mem()
- selfterm()
diff --git a/trash/FAQ.ru.md b/trash/FAQ.ru.md
deleted file mode 100644
index 1dda627..0000000
--- a/trash/FAQ.ru.md
+++ /dev/null
@@ -1,46 +0,0 @@
-
-
-
-
-
-
-Fedora 21 (KDE) Гуглил долго, про oom-killer много чего прочитал. Но не понял, как его активировать. whereis его не нашёл, в репах его не нашёл. Если это встроенное средство - почему у меня не запускается и где его конфиг? Мне нужно тупо, чтобы если оперативка исчерпалась - прибило последнюю вкладку хрома. Либо вообще killall chrome тупо и всё. Как проще всего сделать?
-https://www.linux.org.ru/forum/desktop/11511840
-
-
-
-OOM Killer под себя
-
-Здравствуйте!
-Задача такова, надо (курсовая работа по системному программированию) пропатчить ядро с изменением oom killer'a, т.е.
-1)требуется создать некоторый список с названиями процессов, которые "мудрый киллер" будет удалять в первую очередь или наоборот оставлять.
-2)требуется создать свою систему расчета приоритетов для удаления процессов.
-Я НЕ продвинутый линуксоид, поэтому прошу помощи у общественности. С чего начать и как приступить к выполнению?
-https://www.linux.org.ru/forum/development/3555574
-
-
-
-
-
-Товарищи, можете посоветовать какую-нибудь стандартную программу, которая, в какой-то определённо ситуации, жрала память, что приводило бы к запуску Killer`a.
-Чтобы потом можно было на ней тестить курсач)
-https://www.linux.org.ru/forum/development/3555574?cid=3570489
-
-
-Post-mortem по логам OOM-killer.
-Суть такова, в определенный момент на машине случилось нечто, в результате чего кончилась физическая память и ОС залезла в своп с головой. Производительность упала до потери всякого отклика от машины. OOM-killer включился, но, похоже, занимался убийством исключительно невиновных и никому не мешающих процессов.
-Можно ли как-то по логам отследить, кто сожрал всю память? Хотя бы имена павших жертвами киллера?
-Или сразу самому писать скрипт, сбрасывающий на диск ps aux, для следующего такого случая?
-https://www.linux.org.ru/forum/admin/5693261
-
-
-Проблема в том что у меня на VDS мало оперативки и она занята полностью пытаясь подключиться сервер не может выделить памяти и обрывает соединение. что прописать чтобы ссх постоянно висел в процессах с выделенным скажем полумегабайтным или сколько там ему достаточно лимитом?
-https://www.linux.org.ru/forum/general/5399920?cid=5399954
-
-
-
-
-
-
-
-
diff --git a/trash/isascii b/trash/isascii
deleted file mode 100755
index a897776..0000000
--- a/trash/isascii
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python3
-"""Check file for non-ascii lines."""
-
-from sys import argv
-
-path = argv[1]
-
-print('Path:', path)
-
-
-def isascii(string):
- try:
- string_ascii = string.encode('ascii')
- return True
- except UnicodeEncodeError:
- return False
-
-
-def check_file():
- num = 0
- with open(path) as f:
- for n, line in enumerate(f):
- res = isascii(line)
- if res:
- continue
- else:
- print('Line {} is non-ascii:'.format(n + 1))
- print(line.strip('\n'))
- num += 1
- continue
- print('Found {} non-ascii lines'.format(num))
-
-
-check_file()
diff --git a/trash/memco.py b/trash/memco.py
deleted file mode 100644
index c17a6b5..0000000
--- a/trash/memco.py
+++ /dev/null
@@ -1,276 +0,0 @@
-# memdler common
-
-import os
-import glob
-import signal
-import subprocess
-from glob import glob
-from time import sleep
-
-
-# k = mem_total_used / (zram own size)
-k = 0.0042
-
-
-def meminfo():
-
- # получаем сырой mem_list
- with open('/proc/meminfo') as file:
- mem_list = file.readlines()
-
- # получаем список названий позиций: MemTotal etc
- mem_list_names = []
- for s in mem_list:
- mem_list_names.append(s.split(':')[0])
-
- # ищем MemAvailable, обрабатываем исключение
- try:
- mem_available_index = mem_list_names.index('MemAvailable')
- except ValueError:
- print("Your Linux kernel is too old (3.14+ requied), bye!")
- # исключение для ядер < 3.14, не определяющих MemAvailable
- exit()
-
- # ищем позиции SwapTotl и SwapFree
- swap_total_index = mem_list_names.index('SwapTotal')
- swap_free_index = mem_list_names.index('SwapFree')
-
- buffers_index = mem_list_names.index('Buffers')
- cached_index = mem_list_names.index('Cached')
- active_index = mem_list_names.index('Active')
- inactive_index = mem_list_names.index('Inactive')
- shmem_index = mem_list_names.index('Shmem')
-
- # ищем значение MemTotal в KiB
- mem_total = int(mem_list[0].split(':')[1].split(' ')[-2])
-
- return mem_total, mem_available_index, swap_total_index, swap_free_index, buffers_index, cached_index, active_index, inactive_index, shmem_index
-
-
-meminfo_tuple = meminfo()
-
-mem_total = meminfo_tuple[0]
-mem_available_index = meminfo_tuple[1]
-swap_total_index = meminfo_tuple[2]
-swap_free_index = meminfo_tuple[3]
-
-buffers_index = meminfo_tuple[4]
-cached_index = meminfo_tuple[5]
-active_index = meminfo_tuple[6]
-inactive_index = meminfo_tuple[7]
-shmem_index = meminfo_tuple[8]
-
-
-def meminfo_num(mem_list, index):
- return int(mem_list[index].split(':')[1].split(' ')[-2])
-
-
-# выдача основных показателей meminfo, KiB
-def mem_check_main():
-
- with open('/proc/meminfo') as file:
- mem_list = file.readlines()
-
- mem_available = meminfo_num(mem_list, mem_available_index)
- swap_total = meminfo_num(mem_list, swap_total_index)
- swap_free = meminfo_num(mem_list, swap_free_index)
-
- return mem_available, swap_total, swap_free
-
-
-# читать не весь файл, а нужный срез от 0 до 20, например
-def mem_check_full():
-
- with open('/proc/meminfo') as file:
- mem_list = file.readlines()
-
- mem_available = meminfo_num(mem_list, mem_available_index)
- swap_total = meminfo_num(mem_list, swap_total_index)
- swap_free = meminfo_num(mem_list, swap_free_index)
-
- buffers = meminfo_num(mem_list, buffers_index)
- cached = meminfo_num(mem_list, cached_index)
- active = meminfo_num(mem_list, active_index)
- inactive = meminfo_num(mem_list, inactive_index)
- shmem = meminfo_num(mem_list, shmem_index)
-
- return mem_available, swap_total, swap_free, buffers, cached, active, inactive, shmem
-
-
-# чек общей доступной, для lim2avail
-def total_mem_available():
-
- with open('/proc/meminfo') as file:
- mem_list = file.readlines()
-
- mem_available = meminfo_num(mem_list, mem_available_index)
- swap_free = meminfo_num(mem_list, swap_free_index)
-
- return round((swap_free + mem_available) / 1024) # MiB
-
-
-# добитие байтами рандома
-def terminal():
- ex = []
- while True:
- try:
- ex.append(os.urandom(1))
- except MemoryError:
- continue
-
-
-# перевод дроби в проценты
-def percent(num):
- a = str(round(num * 100, 1)).split('.')
- a0 = a[0].rjust(3, ' ')
- a1 = a[1]
- return '{}.{}'.format(a0, a1)
-
-
-def human(num):
- return str(round(num / 1024.0)).rjust(8, ' ')
-
-
-
-# B -> GiB
-def humanz(num):
- a = str(round(num / 1073741824, 3))
- a0 = a.split('.')[0].rjust(4, ' ')
- a1 = a.split('.')[1]
- if len(a1) == 1:
- a1 += '00'
- if len(a1) == 2:
- a1 += '0'
- return '{}.{}'.format(a0, a1)
-
-
-
-movie_dict = {
- '+----': '-+---',
- '-+---': '--+--',
- '--+--': '---+-',
- '---+-': '----+',
- '----+': '+----'
- }
-
-
-def config_parser(config):
- if os.path.exists(config):
- try:
- with open(config) as f:
- name_value_dict = dict()
- for line in f:
- a = line.startswith('#')
- b = line.startswith('\n')
- c = line.startswith('\t')
- d = line.startswith(' ')
- if not a and not b and not c and not d:
- a = line.split('=')
- name_value_dict[a[0].strip()] = a[1].strip()
- return name_value_dict
- except PermissionError:
- print('config: permission error')
- else:
- print('config does not exists')
-
-
-
-
-def swaps_raw(part_string):
- '''анализ строки свопс, возврат кортежа с значениями'''
- part_string_list = part_string.split('\t')
- part_name = part_string_list[0].split(' ')[0]
-
- part_size = int(part_string_list[-3])
- part_used = int(part_string_list[-2])
- part_prio = int(part_string_list[-1])
-
- return part_name, part_size, part_used, part_prio
-
-
-
-# возвращает disksize и mem_used_total по zram id
-def zram_stat(zram_id):
- with open('/sys/block/zram' + zram_id + '/disksize') as file:
- disksize = file.readlines()[0][:-1]
- if os.path.exists('/sys/block/zram' + zram_id + '/mm_stat'):
- with open('/sys/block/zram' + zram_id + '/mm_stat') as file:
- mm_stat = file.readlines()[0][:-1].split(' ')
- mm_stat_list = []
- for i in mm_stat:
- if i != '':
- mm_stat_list.append(i)
- mem_used_total = mm_stat_list[2]
- else:
- with open('/sys/block/zram' + zram_id + '/mem_used_total') as file:
- mem_used_total = file.readlines()[0][:-1]
- return disksize, mem_used_total
-
-
-
-
-
-
-
-
-# termer(signal.SIGKILL)
-# process terminator
-# функция поиска жиробаса и его убийства
-def terminator(signal):
-
- subdirs = glob('/proc/*/')
- subdirs.remove('/proc/self/')
- subdirs.remove('/proc/thread-self/')
-
- pid_list = []
- name_list = []
- oom_score_list = []
-
- for subdir in subdirs:
-
- try:
-
- with open(subdir + 'status') as file:
- status = file.readlines()
-
- pid_list.append(status[5].split(':')[1][1:-1])
- name_list.append(status[0].split(':')[1][1:-1])
-
- except Exception:
- pass
-
- try:
-
- with open(subdir + 'oom_score') as file:
- oom_score = file.readlines()
-
- oom_score_list.append(int(oom_score[0][0:-1]))
-
- except Exception:
- pass
-
- max_oom_score = sorted(oom_score_list)[-1]
- n = oom_score_list.index(max_oom_score)
- s = sorted(oom_score_list)
- s.reverse()
-
- if signal == signal.SIGTERM:
- print('\nTRY TO TERM {}, Pid {}\n'.format(name_list[n], pid_list[n]))
- else:
- print('\nTRY TO KILL {}, Pid {}\n'.format(name_list[n], pid_list[n]))
-
- try:
- os.kill(int(pid_list[n]), signal)
- except ProcessLookupError:
- print('No such process')
-
-
-
-
-def selfterm():
- os.kill(os.getpid(), signal.SIGTERM)
-
-
-
-
-
diff --git a/trash/memleak/install.sh b/trash/memleak/install.sh
deleted file mode 100755
index 0f765ee..0000000
--- a/trash/memleak/install.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-cp ./memleak /usr/sbin/memleak
-cp ./memleak.service /lib/systemd/system/memleak.service
-systemctl daemon-reload
diff --git a/trash/memleak/memleak b/trash/memleak/memleak
deleted file mode 100755
index e26ee7f..0000000
--- a/trash/memleak/memleak
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-
-from os import system
-from time import sleep
-
-x = []
-
-while True:
- x.append('#' * 99999)
- sleep(0.1)
- system('sleep 9999 &')
-
diff --git a/trash/memleak/memleak.service b/trash/memleak/memleak.service
deleted file mode 100644
index 0a7f3c0..0000000
--- a/trash/memleak/memleak.service
+++ /dev/null
@@ -1,9 +0,0 @@
-[Unit]
-Description=Memory leak daemon
-After=sysinit.target
-
-[Service]
-ExecStart=/usr/sbin/memleak
-
-[Install]
-WantedBy=multi-user.target
diff --git a/trash/mm b/trash/mm
deleted file mode 100755
index 792e953..0000000
--- a/trash/mm
+++ /dev/null
@@ -1,315 +0,0 @@
-#!/usr/bin/env python3
-
-# ms-monitor/
-
-from memco import *
-import time
-
-# once or 1, log or 2, inplace or 3
-mode = '3'
-
-# период цикла печати
-period = 0.2
-
-# параметры визуализации
-used = '$'
-free = '~'
-len_visual = 14
-
-# нахождение и печать параметров, возвращает показатели и принимает показатели для нахождения дельт
-def printer(old_list):
-
- mem_tup = mem_check_main()
-
- mem_available = mem_tup[0]
- swap_total = mem_tup[1]
- swap_free = mem_tup[2]
-
- tn = time.time()
- delta = tn - old_list[4]
-
- mem_busy = mem_total - mem_available
- swap_busy = swap_total - swap_free
-
- mem_swap_total = mem_total + swap_total
- mem_swap_free = mem_available + swap_free
- mem_swap_busy = mem_busy + swap_busy
-
- delta_mem = (mem_busy - old_list[0]) / delta
- delta_swap = (swap_busy - old_list[1]) / delta
- delta_all = (mem_swap_busy - old_list[2]) / delta
-
- if swap_total == 0:
-
-#1###################################################################################
-
- # печать без свопа
-
- mem_visual = (
- used * round(mem_busy / mem_total * len_visual)
- ).ljust(len_visual, free)
-
- print(
- ' MEM'
- )
-
- print(
- 'TOTAL {}'.format(
- human(mem_total),
- )
- )
- print(
- 'N/A {} {}'.format(
- human(mem_busy),
- percent(mem_busy / mem_total),
- )
- )
- print(
- 'AVAIL {} {}'.format(
- human(mem_available),
- percent(mem_available / mem_total),
- )
- )
- print(
- 'DELTA {}'.format(
- human(delta_mem),
- )
- )
- print(
- '{} {}'.format(
- old_list[3], mem_visual
- )
- )
-
-
-#2###################################################################################
-
- else:
-
- with open('/proc/swaps') as file:
- swaps_list = file.readlines()[1:]
-
- zram_id_list = []
-
- disk_swap_size = 0
- disk_swap_used = 0
- zram_swap_size = 0
- zram_swap_used = 0
-
- for i in swaps_list:
-
- x = swaps_raw(i)
-
- if x[0].startswith('/dev/zram'):
-
- zram_swap_size += int(x[1])
- zram_swap_used += int(x[2])
-
- zram_id_list.append(x[0][9:])
-
- else:
-
- disk_swap_size += int(x[1])
- disk_swap_used += int(x[2])
-
- if zram_swap_size == 0:
-
-#3###################################################################################
-
- # печать своп без зрам
-
- mem_visual = (
- used * round(mem_busy / mem_total * len_visual)
- ).ljust(len_visual, free)
- swap_visual = (
- used * round(swap_busy / swap_total * len_visual)
- ).ljust(len_visual, free)
- mem_swap_visual = (
- used * round(mem_swap_busy / mem_swap_total * len_visual)
- ).ljust(len_visual, free)
-
- print(
- ' MEM SWAP MEM + SWAP'
- )
-
- print(
- 'TOTAL {} {} {}'.format(
- human(mem_total),
- human(swap_total),
- human(mem_swap_total),
- )
- )
- print(
- 'N/A {} {} {} {} {} {}'.format(
- human(mem_busy),
- percent(mem_busy / mem_total),
- human(swap_busy),
- percent(swap_busy / swap_total),
- human(mem_swap_busy),
- percent(mem_swap_busy / mem_swap_total),
- )
- )
- print(
- 'AVAIL {} {} {} {} {} {}'.format(
- human(mem_available),
- percent(mem_available / mem_total),
- human(swap_free),
- percent(swap_free / swap_total),
- human(mem_swap_free),
- percent(mem_swap_free / mem_swap_total),
- )
- )
- print(
- 'DELTA {} {} {}'.format(
- human(delta_mem),
- human(delta_swap),
- human(delta_all)
- )
- )
- print(
- '{} {} {} {}'.format(
- old_list[3],
- mem_visual,
- swap_visual,
- mem_swap_visual,
- )
- )
- print()
-
-#4###################################################################################
-
- else:
-
- # суммируем показатели из всех свопов в зрам
-
- disksize_sum = 0
- mem_used_total_sum = 0
-
- for i in zram_id_list:
- s = zram_stat(i) # кортеж из disksize и mem_used_total для данного zram id
- disksize_sum += int(s[0])
- mem_used_total_sum += int(s[1])
-
- # находим показатели для ZRAM
- full = disksize_sum * k + mem_used_total_sum
- profit = zram_swap_used - (full / 1024)
- cr_real = round(zram_swap_used * 1024 / mem_used_total_sum, 2)
-
-#5###################################################################################
-
- # печать своп + зрам
-
- mem_visual = (
- used * round(mem_busy / mem_total * len_visual)
- ).ljust(len_visual, free)
- swap_visual = (
- used * round(swap_busy / swap_total * len_visual)
- ).ljust(len_visual, free)
- mem_swap_visual = (
- used * round(mem_swap_busy / mem_swap_total * len_visual)
- ).ljust(len_visual, free)
- zram_visual = (
- used * round(full / 1024 / mem_total * 18)
- ).ljust(18, free)
-
- print(
- ' MEM SWAP MEM + SWAP ZRAM SWAP'
- )
-
- print(
- 'TOTAL {} {} {} PROFIT {} M'.format(
- human(mem_total),
- human(swap_total),
- human(mem_swap_total),
- human(profit)
- )
- )
- print(
- 'N/A {} {} {} {} {} {} CR {}'.format(
- human(mem_busy),
- percent(mem_busy / mem_total),
- human(swap_busy),
- percent(swap_busy / swap_total),
- human(mem_swap_busy),
- percent(mem_swap_busy / mem_swap_total),
- str(cr_real).rjust(7, ' ')
- )
- )
- print(
- 'AVAIL {} {} {} {} {} {} FULL/MT {} %'.format(
- human(mem_available),
- percent(mem_available / mem_total),
- human(swap_free),
- percent(swap_free / swap_total),
- human(mem_swap_free),
- percent(mem_swap_free / mem_swap_total),
- percent(full / 1024 / mem_total)
- )
- )
- print(
- 'DELTA {} {} {}'.format(
- human(delta_mem),
- human(delta_swap),
- human(delta_all)
- )
- )
- print(
- '{} {} {} {} {}'.format(
- old_list[3],
- mem_visual,
- swap_visual,
- mem_swap_visual,
- zram_visual
- )
- )
- print()
-
-#6###################################################################################
-
- # печать по партициям
-
- print('FILENAME USED SIZE PRIORITY')
-
- for i in swaps_list:
- x = swaps_raw(i)
- print(
- '{} {} G {} % {} G {}'.format(
- str(x[0]).ljust(26, ' '),
- human(x[2]),
- percent(x[2] / x[1]), human(x[1]),
- str(x[3]).rjust(10, ' ')
- )
- )
-
- return [mem_busy, swap_busy, mem_swap_busy, movie_dict[(old_list[3])], tn]
-
-
-
-
-
-
-try:
-
- delta = [0, 0, 0, '+----', 0]
-
- if mode == 'log' or mode == '2':
-
- while True:
- delta = printer(delta)
- sleep(period)
-
- elif mode == 'inplace' or mode == '3':
-
- while True:
- print("\033c")
- delta = printer(delta)
- sleep(period)
-
- else:
-
- delta = printer(delta)
-
-except KeyboardInterrupt:
- print()
- exit()
-
diff --git a/trash/n10 b/trash/n10
deleted file mode 100755
index ada86b7..0000000
--- a/trash/n10
+++ /dev/null
@@ -1,3020 +0,0 @@
-#!/usr/bin/env python3
-"""A daemon that prevents OOM in Linux systems."""
-
-import os
-from ctypes import CDLL
-from time import sleep, time
-from operator import itemgetter
-from sys import stdout, stderr, argv, exit, version
-from re import search
-from sre_constants import error as invalid_re
-from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
-
-
-start_time = time()
-
-
-help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG]
-
-optional arguments:
- -h, --help show this help message and exit
- -v, --version print version
- -t, --test print some tests
- -p, --print-proc-table
- print table of processes with their badness values
- -c CONFIG, --config CONFIG
- path to the config file, default values:
- ./nohang.conf, /etc/nohang/nohang.conf"""
-
-
-SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
-
-SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE'])
-
-conf_err_mess = 'Invalid config. Exit.'
-
-sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP]
-
-sig_dict = {
- SIGKILL: 'SIGKILL',
- SIGINT: 'SIGINT',
- SIGQUIT: 'SIGQUIT',
- SIGHUP: 'SIGHUP',
- SIGTERM: 'SIGTERM'
-}
-
-self_pid = str(os.getpid())
-
-self_uid = os.geteuid()
-
-if self_uid == 0:
- root = True
-else:
- root = False
-
-
-if os.path.exists('./nohang_notify_helper'):
- notify_helper_path = './nohang_notify_helper'
-else:
- notify_helper_path = '/usr/sbin/nohang_notify_helper'
-
-
-victim_dict = dict()
-
-
-
-victim_id = None
-actions_time_dict = dict()
-actions_time_dict['action_handled'] = [time(), victim_id]
-# print(actions_time_dict)
-
-
-
-# will store corrective actions stat
-stat_dict = dict()
-
-
-separate_log = False # will be overwritten after parse config
-
-
-def find_cgroup_indexes():
- """ Find cgroup-line positions in /proc/*/cgroup file.
- """
-
- cgroup_v1_index = None
- cgroup_v2_index = None
-
- with open('/proc/self/cgroup') as f:
- for index, line in enumerate(f):
- if ':name=' in line:
- cgroup_v1_index = index
- if line.startswith('0::'):
- cgroup_v2_index = index
-
- return cgroup_v1_index, cgroup_v2_index
-
-
-cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
-
-
-##########################################################################
-
-# define functions
-
-'''
-def self_rss():
- """
- """
- return pid_to_status(self_pid)[5]
-
-
-def print_self_rss():
- """
- """
- log('Self RSS: {} MiB'.format(self_rss()))
-'''
-
-
-def pid_to_rss(pid):
- try:
- rss = int(rline1(
- '/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE
- except IndexError:
- rss = None
- except FileNotFoundError:
- rss = None
- except ProcessLookupError:
- rss = None
- return rss
-
-
-def pid_to_vm_size(pid):
- try:
- vm_size = int(rline1(
- '/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE
- except IndexError:
- vm_size = None
- except FileNotFoundError:
- vm_size = None
- except ProcessLookupError:
- vm_size = None
- return vm_size
-
-
-
-
-
-
-
-
-def signal_handler(signum, frame):
- """
- """
- for i in sig_list:
- signal(i, signal_handler_inner)
- log('Signal handler called with the {} signal '.format(
- sig_dict[signum]))
- update_stat_dict_and_print(None)
- log('Exit')
- exit()
-
-
-def signal_handler_inner(signum, frame):
- """
- """
- log('Signal handler called with the {} signal (ignored) '.format(
- sig_dict[signum]))
-
-
-def exe(cmd):
- """
- """
- log('Execute the command: {}'.format(cmd))
- t0 = time()
- write_self_oom_score_adj(self_oom_score_adj_max)
- err = os.system(cmd)
- write_self_oom_score_adj(self_oom_score_adj_min)
- dt = time() - t0
- log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
- return err
-
-
-def write(path, string):
- """
- """
- with open(path, 'w') as f:
- f.write(string)
-
-
-def write_self_oom_score_adj(new_value):
- """
- """
- if root:
- write('/proc/self/oom_score_adj', new_value)
-
-
-self_oom_score_adj_min = '-600'
-self_oom_score_adj_max = '-6'
-
-
-write_self_oom_score_adj(self_oom_score_adj_min)
-
-
-def valid_re(reg_exp):
- """Validate regular expression.
- """
- try:
- search(reg_exp, '')
- except invalid_re:
- log('Invalid config: invalid regexp: {}'.format(reg_exp))
- exit(1)
-
-
-def func_print_proc_table():
- """
- """
- print_proc_table = True
- find_victim(print_proc_table)
- exit()
-
-
-def log(*msg):
- """
- """
- try:
- print(*msg)
- except OSError:
- sleep(0.01)
- if separate_log:
- try:
- info(*msg)
- except OSError:
- sleep(0.01)
-
-
-def print_version():
- """
- """
- try:
- v = rline1('/etc/nohang/version')
- except FileNotFoundError:
- v = None
- if v is None:
- print('Nohang unknown version')
- else:
- print('Nohang ' + v)
- exit()
-
-
-def test():
- """
- """
- print('\n(This option is not ready to use!)\n')
-
- print(version)
- print(argv)
-
- hr = '=================================='
- print(hr)
- print("uptime()")
- print(uptime())
-
- print(hr)
- print("os.uname()")
- print(os.uname())
-
- print(hr)
- print("pid_to_starttime('self')")
- print(pid_to_starttime('self'))
-
- print(hr)
- print("get_victim_id('self')")
- print(get_victim_id('self'))
-
- print(hr)
- print("errprint('test')")
- print(errprint('test'))
-
- print(hr)
- print("mlockall()")
- print(mlockall())
-
- print(hr)
- print("pid_to_state('2')")
- print(pid_to_state('2'))
-
- exit()
-
-
-def pid_to_cgroup_v1(pid):
- """
- """
- cgroup_v1 = ''
- try:
- with open('/proc/' + pid + '/cgroup') as f:
- for index, line in enumerate(f):
- if index == cgroup_v1_index:
- cgroup_v1 = '/' + line.partition('/')[2][:-1]
- return cgroup_v1
- except FileNotFoundError:
- return ''
-
-
-def pid_to_cgroup_v2(pid):
- """
- """
- cgroup_v2 = ''
- try:
- with open('/proc/' + pid + '/cgroup') as f:
- for index, line in enumerate(f):
- if index == cgroup_v2_index:
- cgroup_v2 = line[3:-1]
- return cgroup_v2
- except FileNotFoundError:
- return ''
-
-
-def pid_to_starttime(pid):
- """ handle FNF error!
- """
- try:
- starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[
- 2].split(' ')[20]
-
- except UnicodeDecodeError:
- # print('LOL')
- with open('/proc/' + pid + '/stat', 'rb') as f:
- starttime = f.read().decode('utf-8', 'ignore').rpartition(
- ')')[2].split(' ')[20]
-
- return float(starttime) / SC_CLK_TCK
-
-
-def get_victim_id(pid):
- """victim_id is starttime + pid"""
- try:
- return rline1('/proc/' + pid + '/stat').rpartition(
- ')')[2].split(' ')[20] + '_pid' + pid
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
-
-
-def pid_to_state(pid):
- """ Handle FNF error! (BTW it already handled in find_victim_info())
- """
- return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1]
-
-
-def pid_to_name(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/comm', 'rb') as f:
- return f.read().decode('utf-8', 'ignore')[:-1]
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
-
-
-def pid_to_ppid(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/status') as f:
- for n, line in enumerate(f):
- if n is ppid_index:
- return line.split('\t')[1].strip()
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
- except UnicodeDecodeError:
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
- for i in range(len(f_list)):
- if i is ppid_index:
- return f_list[i].split('\t')[1]
-
-
-def pid_to_ancestry(pid, max_ancestry_depth=1):
- """
- """
- if max_ancestry_depth == 1:
- ppid = pid_to_ppid(pid)
- pname = pid_to_name(ppid)
- return '\n PPID: {} ({})'.format(ppid, pname)
- if max_ancestry_depth == 0:
- return ''
- anc_list = []
- for i in range(max_ancestry_depth):
- ppid = pid_to_ppid(pid)
- pname = pid_to_name(ppid)
- anc_list.append((ppid, pname))
- if ppid == '1':
- break
- pid = ppid
- a = ''
- for i in anc_list:
- a = a + ' <= PID {} ({})'.format(i[0], i[1])
- return '\n Ancestry: ' + a[4:]
-
-
-def pid_to_cmdline(pid):
- """
- Get process cmdline by pid.
-
- pid: str pid of required process
- returns string cmdline
- """
- try:
- with open('/proc/' + pid + '/cmdline') as f:
- return f.read().replace('\x00', ' ').rstrip()
- except FileNotFoundError:
- return ''
-
-
-def pid_to_environ(pid):
- """
- Get process environ by pid.
-
- pid: str pid of required process
- returns string environ
- """
- try:
- with open('/proc/' + pid + '/environ') as f:
- return f.read().replace('\x00', ' ').rstrip()
- except FileNotFoundError:
- return ''
-
-
-def pid_to_realpath(pid):
- try:
- return os.path.realpath('/proc/' + pid + '/exe')
- except FileNotFoundError:
- return ''
-
-
-def pid_to_uid(pid):
- """return euid"""
- try:
- with open('/proc/' + pid + '/status') as f:
- for n, line in enumerate(f):
- if n is uid_index:
- return line.split('\t')[2]
- except UnicodeDecodeError:
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
- return f_list[uid_index].split('\t')[2]
- except FileNotFoundError:
- return ''
-
-
-def pid_to_badness(pid):
- """Find and modify badness (if it needs)."""
-
- try:
-
- oom_score = int(rline1('/proc/' + pid + '/oom_score'))
- badness = oom_score
-
- if decrease_oom_score_adj:
- oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
- if badness > oom_score_adj_max and oom_score_adj > 0:
- badness = badness - oom_score_adj + oom_score_adj_max
-
- if regex_matching:
- name = pid_to_name(pid)
- for re_tup in processname_re_list:
- if search(re_tup[1], name) is not None:
- badness += int(re_tup[0])
-
- if re_match_cgroup_v1:
- cgroup_v1 = pid_to_cgroup_v1(pid)
- for re_tup in cgroup_v1_re_list:
- if search(re_tup[1], cgroup_v1) is not None:
- badness += int(re_tup[0])
-
- if re_match_cgroup_v2:
- cgroup_v2 = pid_to_cgroup_v2(pid)
- for re_tup in cgroup_v2_re_list:
- if search(re_tup[1], cgroup_v2) is not None:
- badness += int(re_tup[0])
-
- if re_match_realpath:
- realpath = pid_to_realpath(pid)
- for re_tup in realpath_re_list:
- if search(re_tup[1], realpath) is not None:
- badness += int(re_tup[0])
-
- if re_match_cmdline:
- cmdline = pid_to_cmdline(pid)
- for re_tup in cmdline_re_list:
- if search(re_tup[1], cmdline) is not None:
- badness += int(re_tup[0])
-
- if re_match_environ:
- environ = pid_to_environ(pid)
- for re_tup in environ_re_list:
- if search(re_tup[1], environ) is not None:
- badness += int(re_tup[0])
-
- if re_match_uid:
- uid = pid_to_uid(pid)
- for re_tup in uid_re_list:
- if search(re_tup[1], uid) is not None:
- badness += int(re_tup[0])
-
- if forbid_negative_badness:
- if badness < 0:
- badness = 0
-
- return badness, oom_score
-
- except FileNotFoundError:
- return None, None
- except ProcessLookupError:
- return None, None
-
-
-def pid_to_status(pid):
- """
- """
-
- try:
-
- with open('/proc/' + pid + '/status') as f:
-
- for n, line in enumerate(f):
-
- if n is 0:
- name = line.split('\t')[1][:-1]
-
- if n is state_index:
- state = line.split('\t')[1][0]
- continue
-
- if n is ppid_index:
- ppid = line.split('\t')[1][:-1]
- continue
-
- if n is uid_index:
- uid = line.split('\t')[2]
- continue
-
- if n is vm_size_index:
- vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_rss_index:
- vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_swap_index:
- vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
- break
-
- return name, state, ppid, uid, vm_size, vm_rss, vm_swap
-
- except UnicodeDecodeError:
- return pid_to_status_unicode(pid)
-
- except FileNotFoundError:
- return None
-
- except ProcessLookupError:
- return None
-
- except ValueError:
- return None
-
-
-def pid_to_status_unicode(pid):
- """
- """
- try:
-
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
-
- for i in range(len(f_list)):
-
- if i is 0:
- name = f_list[i].split('\t')[1]
-
- if i is state_index:
- state = f_list[i].split('\t')[1][0]
-
- if i is ppid_index:
- ppid = f_list[i].split('\t')[1]
-
- if i is uid_index:
- uid = f_list[i].split('\t')[2]
-
- if i is vm_size_index:
- vm_size = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_rss_index:
- vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_swap_index:
- vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- return name, state, ppid, uid, vm_size, vm_rss, vm_swap
-
- except FileNotFoundError:
- return None
-
- except ProcessLookupError:
- return None
-
- except ValueError:
- return None
-
-
-##########################################################################
-
-
-def uptime():
- """
- """
- return float(rline1('/proc/uptime').split(' ')[0])
-
-
-def errprint(*text):
- """
- """
- print(*text, file=stderr, flush=True)
-
-
-def mlockall():
- """Lock all memory to prevent swapping nohang process."""
-
- MCL_CURRENT = 1
- MCL_FUTURE = 2
- MCL_ONFAULT = 4
-
- libc = CDLL('libc.so.6', use_errno=True)
-
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
- )
- if result != 0:
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE
- )
- if result != 0:
- log('WARNING: cannot lock all memory')
- else:
- pass
- # log('All memory locked with MCL_CURRENT | MCL_FUTURE')
- else:
- pass
- # log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
-
-
-def update_stat_dict_and_print(key):
- """
- """
-
- if key is not None:
-
- if key not in stat_dict:
-
- stat_dict.update({key: 1})
-
- else:
-
- new_value = stat_dict[key] + 1
- stat_dict.update({key: new_value})
-
- if print_total_stat:
-
- stats_msg = 'Total stat (what happened in the last {}):'.format(
- format_time(time() - start_time))
-
- for i in stat_dict:
- stats_msg += '\n {}: {}'.format(i, stat_dict[i])
-
- log(stats_msg)
-
-
-def find_psi_metrics_value(psi_path, psi_metrics):
- """
- """
-
- if psi_support:
-
- if psi_metrics == 'some_avg10':
- return float(rline1(psi_path).split(' ')[1].split('=')[1])
- if psi_metrics == 'some_avg60':
- return float(rline1(psi_path).split(' ')[2].split('=')[1])
- if psi_metrics == 'some_avg300':
- return float(rline1(psi_path).split(' ')[3].split('=')[1])
-
- if psi_metrics == 'full_avg10':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[1].split('=')[1])
- if psi_metrics == 'full_avg60':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[2].split('=')[1])
- if psi_metrics == 'full_avg300':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[3].split('=')[1])
-
-
-def check_mem_and_swap():
- """find mem_available, swap_total, swap_free"""
- with open('/proc/meminfo') as f:
- for n, line in enumerate(f):
- if n is 2:
- mem_available = int(line.split(':')[1][:-4])
- continue
- if n is swap_total_index:
- swap_total = int(line.split(':')[1][:-4])
- continue
- if n is swap_free_index:
- swap_free = int(line.split(':')[1][:-4])
- break
- return mem_available, swap_total, swap_free
-
-
-def check_zram():
- """find MemUsedZram"""
- disksize_sum = 0
- mem_used_total_sum = 0
-
- for dev in os.listdir('/sys/block'):
- if dev.startswith('zram'):
- stat = zram_stat(dev)
- disksize_sum += int(stat[0])
- mem_used_total_sum += int(stat[1])
-
- # Means that when setting zram disksize = 1 GiB available memory
- # decrease by 0.0042 GiB.
- # Found experimentally, requires clarification with different kernaels and
- # architectures.
- # On small disk drives (up to gigabyte) it can be more, up to 0.0045.
- # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should
- # be 0.001:
- # ("zram uses about 0.1% of the size of the disk"
- # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
- # but this statement contradicts the experimental data.
- # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize
- # Found experimentally.
- ZRAM_DISKSIZE_FACTOR = 0.0042
-
- return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
-
-
-def format_time(t):
- """
- """
- t = int(t)
- if t < 60:
- return '{} sec'.format(t)
- elif t >= 60 and t < 3600:
- m = t // 60
- s = t % 60
- return '{} min {} sec'.format(m, s)
- else:
- h = t // 3600
- s0 = t - h * 3600
- m = s0 // 60
- s = s0 % 60
- return '{} h {} min {} sec'.format(h, m, s)
-
-
-def string_to_float_convert_test(string):
- """Try to interprete string values as floats."""
- try:
- return float(string)
- except ValueError:
- return None
-
-
-def string_to_int_convert_test(string):
- """Try to interpret string values as integers."""
- try:
- return int(string)
- except ValueError:
- return None
-
-
-def conf_parse_string(param):
- """
- Get string parameters from the config dict.
-
- param: config_dict key
- returns config_dict[param].strip()
- """
- if param in config_dict:
- return config_dict[param].strip()
- else:
- errprint('All the necessary parameters must be in the config')
- errprint('There is no "{}" parameter in the config'.format(param))
- exit(1)
-
-
-def conf_parse_bool(param):
- """
- Get bool parameters from the config_dict.
-
- param: config_dicst key
- returns bool
- """
- if param in config_dict:
- param_str = config_dict[param]
- if param_str == 'True':
- return True
- elif param_str == 'False':
- return False
- else:
- errprint('Invalid value of the "{}" parameter.'.format(param))
- errprint('Valid values are True and False.')
- errprint('Exit')
- exit(1)
- else:
- errprint('All the necessary parameters must be in the config')
- errprint('There is no "{}" parameter in the config'.format(param))
- exit(1)
-
-
-def rline1(path):
- """read 1st line from path."""
- try:
- with open(path) as f:
- for line in f:
- return line[:-1]
- except UnicodeDecodeError:
- with open(path, 'rb') as f:
- return f.read(999).decode(
- 'utf-8', 'ignore').split('\n')[0] # use partition()!
-
-
-def kib_to_mib(num):
- """Convert KiB values to MiB values."""
- return round(num / 1024.0)
-
-
-def percent(num):
- """Interprete num as percentage."""
- return round(num * 100, 1)
-
-
-def just_percent_mem(num):
- """convert num to percent and justify"""
- return str(round(num * 100, 1)).rjust(4, ' ')
-
-
-def just_percent_swap(num):
- """
- """
- return str(round(num * 100, 1)).rjust(5, ' ')
-
-
-def human(num, lenth):
- """Convert KiB values to MiB values with right alignment"""
- return str(round(num / 1024)).rjust(lenth, ' ')
-
-
-def zram_stat(zram_id):
- """
- Get zram state.
-
- zram_id: str zram block-device id
- returns bytes diskcize, str mem_used_total
- """
- try:
- disksize = rline1('/sys/block/' + zram_id + '/disksize')
- except FileNotFoundError:
- return '0', '0'
- if disksize == ['0\n']:
- return '0', '0'
- try:
- mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ')
- mm_stat_list = []
- for i in mm_stat:
- if i != '':
- mm_stat_list.append(i)
- mem_used_total = mm_stat_list[2]
- except FileNotFoundError:
- mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total')
- return disksize, mem_used_total # BYTES, str
-
-
-def send_notify_warn():
- """
- Look for process with maximum 'badness' and warn user with notification.
- (implement Low memory warnings)
- """
- log('Warning threshold exceeded')
-
- if check_warning_exe:
- exe(warning_exe)
-
- else:
-
- title = 'Low memory'
-
- body = 'MemAvail: {}%\nSwapFree: {}%'.format(
- round(mem_available / mem_total * 100),
- round(swap_free / (swap_total + 0.1) * 100)
- )
-
- send_notification(title, body)
-
-
-def send_notify(signal, name, pid):
- """
- Notificate about OOM Preventing.
-
- signal: key for notify_sig_dict
- name: str process name
- pid: str process pid
- """
-
- # wait for memory release after corrective action
- # may be useful if free memory was about 0 immediately after
- # corrective action
- sleep(0.05)
-
- title = 'Freeze prevention'
- body = '{} [{}] {}'.format(
- notify_sig_dict[signal],
- pid,
- name.replace(
- # symbol '&' can break notifications in some themes,
- # therefore it is replaced by '*'
- '&', '*'
- )
- )
-
- send_notification(title, body)
-
-
-def send_notify_etc(pid, name, command):
- """
- Notificate about OOM Preventing.
-
- command: str command that will be executed
- name: str process name
- pid: str process pid
- """
- title = 'Freeze prevention'
- body = 'Victim is [{}] {}\nExecute the co' \
- 'mmand:\n{}'.format(
- pid, name.replace('&', '*'), command.replace('&', '*'))
-
- send_notification(title, body)
-
-
-def send_notification(title, body):
- """
- """
- split_by = '#' * 16
-
- t000 = time()
-
- path_to_cache = '/dev/shm/nohang_notify_cache_uid{}_time{}'.format(
- str(self_uid), t000
- )
-
- text = '{}{}{}'.format(title, split_by, body)
-
- try:
- with open(path_to_cache, 'w') as f:
- f.write(text)
- os.chmod(path_to_cache, 0o600)
- except OSError:
- log('OSError while send notification '
- '(No space left on device: /dev/shm)')
- return None
-
- cmd = '{} --uid {} --time {} &'.format(notify_helper_path, self_uid, t000)
-
- exe(cmd)
-
-
-def get_pid_list():
- """
- Find pid list expect kthreads and zombies
- """
- pid_list = []
- for pid in os.listdir('/proc'):
- if os.path.exists('/proc/' + pid + '/exe') is True:
- pid_list.append(pid)
- return pid_list
-
-
-pid_list = get_pid_list()
-
-
-def get_non_decimal_pids():
- """
- """
- non_decimal_list = []
- for pid in pid_list:
- if pid[0].isdecimal() is False:
- non_decimal_list.append(pid)
- return non_decimal_list
-
-
-def find_victim(_print_proc_table):
- """
- Find the process with highest badness and its badness adjustment
- Return pid and badness
- """
-
- ft1 = time()
-
- pid_list = get_pid_list()
-
- pid_list.remove(self_pid)
-
- if '1' in pid_list:
- pid_list.remove('1')
-
- non_decimal_list = get_non_decimal_pids()
-
- for i in non_decimal_list:
- if i in pid_list:
- pid_list.remove(i)
-
- pid_badness_list = []
-
- if _print_proc_table:
-
- if extra_table_info == 'None':
- extra_table_title = ''
-
- elif extra_table_info == 'cgroup_v1':
- extra_table_title = 'CGroup_v1'
-
- elif extra_table_info == 'cgroup_v2':
- extra_table_title = 'CGroup_v2'
-
- elif extra_table_info == 'cmdline':
- extra_table_title = 'cmdline'
-
- elif extra_table_info == 'environ':
- extra_table_title = 'environ'
-
- elif extra_table_info == 'realpath':
- extra_table_title = 'realpath'
-
- elif extra_table_info == 'All':
- extra_table_title = '[CGroup] [CmdLine] [RealPath]'
- else:
- extra_table_title = ''
-
- hr = '#' * 115
-
- log(hr)
- log('# PID PPID badness oom_score oom_score_adj e'
- 'UID S VmSize VmRSS VmSwap Name {}'.format(
- extra_table_title))
- log('#------- ------- ------- --------- ------------- -------'
- '--- - ------ ----- ------ --------------- --------')
-
- for pid in pid_list:
-
- badness = pid_to_badness(pid)[0]
-
- if badness is None:
- continue
-
- if _print_proc_table:
-
- try:
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
- except FileNotFoundError:
- continue
-
- if pid_to_status(pid) is None:
- continue
- else:
- (name, state, ppid, uid, vm_size, vm_rss,
- vm_swap) = pid_to_status(pid)
-
- if extra_table_info == 'None':
- extra_table_line = ''
-
- elif extra_table_info == 'cgroup_v1':
- extra_table_line = pid_to_cgroup_v1(pid)
-
- elif extra_table_info == 'cgroup_v2':
- extra_table_line = pid_to_cgroup_v2(pid)
-
- elif extra_table_info == 'cmdline':
- extra_table_line = pid_to_cmdline(pid)
-
- elif extra_table_info == 'environ':
- extra_table_line = pid_to_environ(pid)
-
- elif extra_table_info == 'realpath':
- extra_table_line = pid_to_realpath(pid)
-
- elif extra_table_info == 'All':
- extra_table_line = '[CG: {}] [CL: {}] [RP: {}]'.format(
- pid_to_cgroup_v1(pid),
- pid_to_cmdline(pid),
- pid_to_realpath(pid)
- )
- else:
- extra_table_line = ''
-
- log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format(
- pid.rjust(7),
- ppid.rjust(7),
- str(badness).rjust(7),
- oom_score.rjust(9),
- oom_score_adj.rjust(13),
- uid.rjust(10),
- state,
- str(vm_size).rjust(6),
- str(vm_rss).rjust(5),
- str(vm_swap).rjust(6),
- name.ljust(15),
- extra_table_line
- )
- )
-
- pid_badness_list.append((pid, badness))
-
- real_proc_num = len(pid_badness_list)
-
- # Make list of (pid, badness) tuples, sorted by 'badness' values
- # print(pid_badness_list)
- pid_tuple_list = sorted(
- pid_badness_list,
- key=itemgetter(1),
- reverse=True
- )[0]
-
- pid = pid_tuple_list[0]
-
- # Get maximum 'badness' value
- victim_badness = pid_tuple_list[1]
- victim_name = pid_to_name(pid)
-
- if _print_proc_table:
- log(hr)
-
- log('Found {} processes with existing /proc/[pid]/exe'.format(
- real_proc_num))
-
- log(
- 'Process with highest badness (found in {} ms):\n PID: {}, Na'
- 'me: {}, badness: {}'.format(
- round((time() - ft1) * 1000),
- pid,
- victim_name,
- victim_badness
- )
- )
-
- return pid, victim_badness, victim_name
-
-
-def find_victim_info(pid, victim_badness, name):
- """
- """
- status0 = time()
-
- try:
-
- with open('/proc/' + pid + '/status') as f:
-
- for n, line in enumerate(f):
-
- if n is state_index:
- state = line.split('\t')[1].rstrip()
- continue
-
- if n is ppid_index:
- ppid = line.split('\t')[1]
- continue
-
- if n is uid_index:
- uid = line.split('\t')[2]
- continue
-
- if n is vm_size_index:
- vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_rss_index:
- vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if detailed_rss:
-
- if n is anon_index:
- anon_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is file_index:
- file_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is shmem_index:
- shmem_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_swap_index:
- vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
- break
-
- cmdline = pid_to_cmdline(pid)
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
-
- except FileNotFoundError:
- log('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
- except ProcessLookupError:
- log('The victim died in the search process: ProcessLookupError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ProcessLookupError')
- return None
- except UnicodeDecodeError:
-
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
-
- for i in range(len(f_list)):
-
- if i is state_index:
- state = f_list[i].split('\t')[1].rstrip()
-
- if i is ppid_index:
- ppid = f_list[i].split('\t')[1]
-
- if i is uid_index:
- uid = f_list[i].split('\t')[2]
-
- if i is vm_size_index:
- vm_size = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_rss_index:
- vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- if detailed_rss:
-
- if i is anon_index:
- anon_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is file_index:
- file_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is shmem_index:
- shmem_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_swap_index:
- vm_swap = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- cmdline = pid_to_cmdline(pid)
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
-
- except IndexError:
- log('The victim died in the search process: IndexError')
- update_stat_dict_and_print(
- 'The victim died in the search process: IndexError')
- return None
- except ValueError:
- log('The victim died in the search process: ValueError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ValueError')
- return None
- except FileNotFoundError:
- log('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
- except ProcessLookupError:
- log('The victim died in the search process: ProcessLookupError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ProcessLookupError')
- return None
-
- len_vm = len(str(vm_size))
-
- try:
- realpath = os.path.realpath('/proc/' + pid + '/exe')
- victim_lifetime = format_time(uptime() - pid_to_starttime(pid))
- victim_cgroup_v1 = pid_to_cgroup_v1(pid)
- victim_cgroup_v2 = pid_to_cgroup_v2(pid)
-
- except FileNotFoundError:
- print('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
-
- ancestry = pid_to_ancestry(pid, max_ancestry_depth)
-
- if detailed_rss:
- detailed_rss_info = ' (' \
- 'Anon: {} MiB, ' \
- 'File: {} MiB, ' \
- 'Shmem: {} MiB)'.format(
- anon_rss,
- file_rss,
- shmem_rss)
- else:
- detailed_rss_info = ''
-
- victim_info = 'Victim information (found in {} ms):' \
- '\n Name: {}' \
- '\n State: {}' \
- '\n PID: {}' \
- '{}' \
- '\n EUID: {}' \
- '\n badness: {}, ' \
- 'oom_score: {}, ' \
- 'oom_score_adj: {}' \
- '\n VmSize: {} MiB' \
- '\n VmRSS: {} MiB {}' \
- '\n VmSwap: {} MiB' \
- '\n CGroup_v1: {}' \
- '\n CGroup_v2: {}' \
- '\n Realpath: {}' \
- '\n Cmdline: {}' \
- '\n Lifetime: {}'.format(
- round((time() - status0) * 1000),
- name,
- state,
- pid,
- ancestry,
- uid,
- victim_badness,
- oom_score,
- oom_score_adj,
- vm_size,
- str(vm_rss).rjust(len_vm),
- detailed_rss_info,
- str(vm_swap).rjust(len_vm),
- victim_cgroup_v1,
- victim_cgroup_v2,
- realpath,
- cmdline,
- victim_lifetime)
-
- return victim_info
-
-
-
-
-
-
-
-
-
-
-
-
-def implement_corrective_action(signal):
- """
- Find victim with highest badness and send SIGTERM/SIGKILL
- """
-
-
- # выходим из фции, если для SIGTERM порога не превышено время min_delay_after_sigterm и спим в течение over_sleep
- if signal is SIGTERM:
-
- dt = time() - actions_time_dict['action_handled'][0]
-
- if dt < min_delay_after_sigterm:
- pass
- # print('min_delay_after_sigterm IS NOT EXCEEDED ({} < {})'.format(round(dt, 3), min_delay_after_sigterm))
-
- if print_sleep_periods:
- pass
- # log('Sleep {} sec [in implement_corrective_action()]'.format(over_sleep))
-
- sleep(over_sleep)
-
- return None # время задержки между действиями не истекло
- else:
- pass
- # print('min_delay_after_sigterm IS EXCEEDED, it is time to action')
-
-
-
-
- """
-
- При заходе в фцию проверяем права на сигтерм. Права на сигкилл у нас всегда есть
- (потому что идем дальше только после полн освободж памяти после смерти жертвы)
-
- actions_time_dict[action_handled] = time()
- actions_time_dict[veto] = True
-
- actions_time_dict['action_handled'] = [time(), victim_id]
-
-
-
- """
-
-
- # log(mem_info)
-
- pid, victim_badness, name = find_victim(print_proc_table)
-
- if victim_badness >= min_badness:
-
- if print_victim_info:
- victim_info = find_victim_info(pid, victim_badness, name)
- log(victim_info)
-
-
-
-
- # пороги могли превысиься за время поиска жертвы (поиск может занимать сотни миллисекунд)
- mem_available, swap_total, swap_free = check_mem_and_swap()
-
- ma_mib = int(mem_available) / 1024.0
- sf_mib = int(swap_free) / 1024.0
- log('Memory status before implementing a corrective act'
- 'ion:\n MemAvailable'
- ': {} MiB, SwapFree: {} MiB'.format(
- round(ma_mib, 1), round(sf_mib, 1)
- )
- )
-
- if (mem_available <= mem_min_sigkill_kb and
- swap_free <= swap_min_sigkill_kb):
- log('Hard threshold exceeded')
- signal = SIGKILL
-
-
-
- victim_id = get_victim_id(pid)
-
-
-
-
-
-
- # kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ
- # ЗАДАННОГО ВРЕМЕНИ
-
- # переопределяем сигнал для старых жертв
- if signal is SIGTERM:
-
- if victim_id in victim_dict:
-
- dt = time() - victim_dict[victim_id]
-
- if dt > max_post_sigterm_victim_lifetime:
- pass
- # print('max_post_sigterm_victim_lifetime exceeded: the victim will get SIGKILL')
- signal = SIGKILL
-
-
-
-
-
-
-
-
-
-
-
- # matching with re to customize corrective actions
- soft_match = False
-
- if soft_actions and signal is SIGTERM:
- name = pid_to_name(pid)
- cgroup_v1 = pid_to_cgroup_v1(pid)
- service = ''
- cgroup_v1_tail = cgroup_v1.rpartition('/')[2]
- if cgroup_v1_tail.endswith('.service'):
- service = cgroup_v1_tail
- for i in soft_actions_list:
- unit = i[0]
- if unit == 'name':
- u = name
- else:
- u = cgroup_v1
- regexp = i[1]
- command = i[2]
- if search(regexp, u) is not None:
- log("Regexp '{}' matches with {} '{}'".format(
- regexp, unit, u))
- soft_match = True
- break
-
- if soft_match:
-
- # todo: make new func
- m = check_mem_and_swap()
- ma = int(m[0]) / 1024.0
- sf = int(m[2]) / 1024.0
- log('Memory status before implementing a corrective act'
- 'ion:\n MemAvailable'
- ': {} MiB, SwapFree: {} MiB'.format(
- round(ma, 1), round(sf, 1)
- )
- )
-
- cmd = command.replace(
- '$PID',
- pid).replace(
- '$NAME',
- pid_to_name(pid)).replace(
- '$SERVICE',
- service)
-
- exit_status = exe(cmd)
-
- exit_status = str(exit_status)
-
- response_time = time() - time0
-
- # тут надо, как и при дефолтном действии, проверять существование жертвы, ее реакцию на действие,
- # и время ее смерти в случае успеха, о обновление таймстемпов действия
-
- etc_info = 'Implement a corrective act' \
- 'ion:\n Run the command: {}' \
- '\n Exit status: {}; total response ' \
- 'time: {} ms'.format(
- cmd,
- exit_status,
- round(response_time * 1000))
-
- log(etc_info)
-
- key = "Run the command '{}'".format(cmd)
- update_stat_dict_and_print(key)
-
- if gui_notifications:
- send_notify_etc(
- pid,
- name,
- command.replace('$PID', pid).replace(
- '$NAME', pid_to_name(pid)))
-
-
-
-
-
-
-
-
- else:
-
- # обычное действие через сигнал
- try:
-
-
- os.kill(int(pid), signal)
- kill_timestamp = time()
- response_time = kill_timestamp - time0
-
-
-
-
-
-
-
- while True:
- exe_exists = os.path.exists('/proc/{}/exe'.format(pid))
- rss = pid_to_rss(pid)
- dt = time() - kill_timestamp
- # log('Victim VmRSS: {} KiB'.format(rss))
- if not exe_exists or rss == 0 or dt > 0.01:
- #print(dt)
- break
- sleep(0.001)
-
- if dt > 0.01:
- # log('Timer (value = 0.01 sec) expired; seems like the victim handles signal')
-
- actions_time_dict['action_handled'] = [time(), get_victim_id(pid)]
-
-
- if victim_id not in victim_dict: # хз как надо.
- victim_dict.update({victim_id: time()})
-
-
- # log('actions_time_dict', actions_time_dict)
- # log('victim_dict', victim_dict)
-
-
-
-
- else:
- log('Process exited (VmRSS = 0) in {} sec'.format(
- round(dt, 5)))
-
-
-
-
-
-
-
- if signal is SIGKILL or not exe_exists or rss == 0:
-
- while True:
- sleep(0.001)
- rss = pid_to_rss(pid) # рсс не важен когда путь не существует. Проверяй просто существование пид.
- if rss is None:
- break
- t1 = time()
- kill_duration = t1 - kill_timestamp
- log('The victim died in {} sec'.format(
- round(kill_duration, 3)))
-
-
- mem_available, swap_total, swap_free = check_mem_and_swap()
-
- ma_mib = int(mem_available) / 1024.0
- sf_mib = int(swap_free) / 1024.0
- log('Memory status after implementing a corrective act'
- 'ion:\n MemAvailable'
- ': {} MiB, SwapFree: {} MiB'.format(
- round(ma_mib, 1), round(sf_mib, 1)
- )
- )
-
-
-
-
-
-
-
- send_result = 'total response time: {} ms'.format(
- round(response_time * 1000))
-
- preventing_oom_message = 'Implement a corrective action:' \
- '\n Send {} to the victim; {}'.format(
- sig_dict[signal], send_result)
-
- key = 'Send {} to {}'.format(sig_dict[signal], name)
-
- if signal is SIGKILL and post_kill_exe != '':
-
- cmd = post_kill_exe.replace('$PID', pid).replace(
- '$NAME', pid_to_name(pid))
-
- log('Execute post_kill_exe')
-
- exe(cmd)
-
- if gui_notifications:
- send_notify(signal, name, pid)
-
- except FileNotFoundError:
- response_time = time() - time0
- send_result = 'no such process; response time: {} ms'.format(
- round(response_time * 1000))
- key = 'FileNotFoundError (the victim died in the se' \
- 'arch process): '
- except ProcessLookupError:
- response_time = time() - time0
- send_result = 'no such process; response time: {} ms'.format(
- round(response_time * 1000))
- key = 'ProcessLookupError (the victim died in the se' \
- 'arch process): '
-
- try:
- log(preventing_oom_message)
-
- except UnboundLocalError:
- preventing_oom_message = key
-
- update_stat_dict_and_print(key)
-
- else:
-
- response_time = time() - time0
- victim_badness_is_too_small = 'victim badness {} < min_b' \
- 'adness {}; nothing to do; response time: {} ms'.format(
- victim_badness,
- min_badness,
- round(response_time * 1000))
-
- log(victim_badness_is_too_small)
-
- # update stat_dict
- key = 'victim badness < min_badness'
- update_stat_dict_and_print(key)
-
- # тут надо поспать хорошенько. а может и счетчики поправить.
- # херню несу. во-первых, внезапно может кто-то появиться c блльшим бэднес.. Далее надо минимизировать аутпут спам.
- sleep(over_sleep)
-
-
- # обновлять время не на каждый кил, а только на килл той жертвы, которая не отвечала на софт экшн.
- # Вывод: ко времени действия прилагать также виктим айди.
-
- print('##################################################################')
-
-
-def sleep_after_check_mem():
- """Specify sleep times depends on rates and avialable memory."""
-
- if stable_sleep:
-
- if print_sleep_periods:
- log('Sleep {} sec'.format(min_sleep))
-
- sleep(min_sleep)
- return None
-
- if mem_min_sigkill_kb < mem_min_sigterm_kb:
- mem_point = mem_available - mem_min_sigterm_kb
- else:
- mem_point = mem_available - mem_min_sigkill_kb
-
- if swap_min_sigkill_kb < swap_min_sigterm_kb:
- swap_point = swap_free - swap_min_sigterm_kb
- else:
- swap_point = swap_free - swap_min_sigkill_kb
-
- if swap_point < 0:
- swap_point = 0
-
- if mem_point < 0:
- mem_point = 0
-
- t_mem = mem_point / rate_mem
- t_swap = swap_point / rate_swap
- t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram
- if t_zram < 0:
- t_zram = 0
-
- t_mem_swap = t_mem + t_swap
- t_mem_zram = t_mem + t_zram
-
- if t_mem_swap <= t_mem_zram:
- t = t_mem_swap
- else:
- t = t_mem_zram
-
- if t > max_sleep:
- t = max_sleep
- elif t < min_sleep:
- t = min_sleep
- else:
- pass
-
- if print_sleep_periods:
-
- log(
- 'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format(
- round(t, 2),
- round(t_mem, 2),
- round(t_swap, 2),
- round(t_zram, 2)
- )
- )
-
- try:
- stdout.flush()
- except OSError:
- pass
-
- sleep(t)
-
-
-def calculate_percent(arg_key):
- """
- parse conf dict
- Calculate mem_min_KEY_percent.
-
- Try use this one)
- arg_key: str key for config_dict
- returns int mem_min_percent or NoneType if got some error
- """
-
- if arg_key in config_dict:
- mem_min = config_dict[arg_key]
-
- if mem_min.endswith('%'):
- # truncate percents, so we have a number
- mem_min_percent = mem_min[:-1].strip()
- # then 'float test'
- mem_min_percent = string_to_float_convert_test(mem_min_percent)
- if mem_min_percent is None:
- errprint('Invalid {} value, not float\nExit'.format(arg_key))
- exit(1)
- # Final validations...
- if mem_min_percent < 0 or mem_min_percent > 100:
- errprint(
- '{}, as percents value, out of ran'
- 'ge [0; 100]\nExit'.format(arg_key))
- exit(1)
-
- # mem_min_sigterm_percent is clean and valid float percentage. Can
- # translate into Kb
- mem_min_kb = mem_min_percent / 100 * mem_total
- mem_min_mb = round(mem_min_kb / 1024)
-
- elif mem_min.endswith('M'):
- mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip())
- if mem_min_mb is None:
- errprint('Invalid {} value, not float\nExit'.format(arg_key))
- exit(1)
- mem_min_kb = mem_min_mb * 1024
- if mem_min_kb > mem_total:
- errprint(
- '{} value can not be greater then MemT'
- 'otal ({} MiB)\nExit'.format(
- arg_key, round(
- mem_total / 1024)))
- exit(1)
- mem_min_percent = mem_min_kb / mem_total * 100
-
- else:
- log('Invalid {} units in config.\n Exit'.format(arg_key))
- exit(1)
- mem_min_percent = None
-
- else:
- log('{} not in config\nExit'.format(arg_key))
- exit(1)
- mem_min_percent = None
-
- return mem_min_kb, mem_min_mb, mem_min_percent
-
-
-##########################################################################
-
-
-print_proc_table_flag = False
-
-if len(argv) == 1:
- if os.path.exists('./nohang.conf'):
- config = os.getcwd() + '/nohang.conf'
- else:
- config = '/etc/nohang/nohang.conf'
-
-elif len(argv) == 2:
- if argv[1] == '--help' or argv[1] == '-h':
- print(help_mess)
- exit()
- elif argv[1] == '--version' or argv[1] == '-v':
- print_version()
- elif argv[1] == '--test' or argv[1] == '-t':
- test()
- elif argv[1] == '--print-proc-table' or argv[1] == '-p':
- print_proc_table_flag = True
- if os.path.exists('./nohang.conf'):
- config = os.getcwd() + '/nohang.conf'
- else:
- config = '/etc/nohang/nohang.conf'
- else:
- errprint('Unknown option: {}'.format(argv[1]))
- exit(1)
-
-elif len(argv) == 3:
- if argv[1] == '--config' or argv[1] == '-c':
- config = argv[2]
- else:
- errprint('Unknown option: {}'.format(argv[1]))
- exit(1)
-
-else:
- errprint('Invalid CLI input: too many options')
- exit(1)
-
-
-##########################################################################
-
-
-# find mem_total
-# find positions of SwapFree and SwapTotal in /proc/meminfo
-
-with open('/proc/meminfo') as f:
- mem_list = f.readlines()
-
-mem_list_names = []
-for s in mem_list:
- mem_list_names.append(s.split(':')[0])
-
-if mem_list_names[2] != 'MemAvailable':
- errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied')
- # exit(1)
-
-swap_total_index = mem_list_names.index('SwapTotal')
-swap_free_index = swap_total_index + 1
-
-mem_total = int(mem_list[0].split(':')[1][:-4])
-
-# Get names from /proc/*/status to be able to get VmRSS and VmSwap values
-
-with open('/proc/self/status') as file:
- status_list = file.readlines()
-
-status_names = []
-for s in status_list:
- status_names.append(s.split(':')[0])
-
-ppid_index = status_names.index('PPid')
-vm_size_index = status_names.index('VmSize')
-vm_rss_index = status_names.index('VmRSS')
-vm_swap_index = status_names.index('VmSwap')
-uid_index = status_names.index('Uid')
-state_index = status_names.index('State')
-
-
-try:
- anon_index = status_names.index('RssAnon')
- file_index = status_names.index('RssFile')
- shmem_index = status_names.index('RssShmem')
- detailed_rss = True
- # print(detailed_rss, 'detailed_rss')
-except ValueError:
- detailed_rss = False
- # print('It is not Linux 4.5+')
-
-##########################################################################
-
-
-log('Config: ' + config)
-
-
-##########################################################################
-
-# parsing the config with obtaining the parameters dictionary
-
-# conf_parameters_dict
-# conf_restart_dict
-
-# dictionary with config options
-config_dict = dict()
-
-processname_re_list = []
-cmdline_re_list = []
-environ_re_list = []
-uid_re_list = []
-cgroup_v1_re_list = []
-cgroup_v2_re_list = []
-realpath_re_list = []
-
-soft_actions_list = []
-
-
-# separator for optional parameters (that starts with @)
-opt_separator = '///'
-
-
-# stupid conf parsing, need refactoring
-try:
- with open(config) as f:
-
- for line in f:
-
- a = line.startswith('#')
- b = line.startswith('\n')
- c = line.startswith('\t')
- d = line.startswith(' ')
-
- etc = line.startswith('@SOFT_ACTION_RE_NAME')
- etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1')
-
- if not a and not b and not c and not d and not etc and not etc2:
- a = line.partition('=')
-
- key = a[0].strip()
- value = a[2].strip()
-
- if key not in config_dict:
- config_dict[key] = value
- else:
- log('ERROR: config key duplication: {}'.format(key))
- exit(1)
-
- if etc:
-
- a = line.partition('@SOFT_ACTION_RE_NAME')[
- 2].partition(opt_separator)
-
- a1 = 'name'
-
- a2 = a[0].strip()
- valid_re(a2)
-
- a3 = a[2].strip()
-
- zzz = (a1, a2, a3)
-
- soft_actions_list.append(zzz)
-
- if etc2:
-
- a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[
- 2].partition(opt_separator)
-
- a1 = 'cgroup_v1'
-
- a2 = a[0].strip()
- valid_re(a2)
-
- a3 = a[2].strip()
-
- zzz = (a1, a2, a3)
-
- soft_actions_list.append(zzz)
-
- if line.startswith('@PROCESSNAME_RE'):
- a = line.partition(
- '@PROCESSNAME_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- processname_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@CMDLINE_RE'):
- a = line.partition(
- '@CMDLINE_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- cmdline_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@UID_RE'):
- a = line.partition(
- '@UID_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- uid_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@CGROUP_V1_RE'):
- a = line.partition(
- '@CGROUP_V1_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- cgroup_v1_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@CGROUP_V2_RE'):
- a = line.partition(
- '@CGROUP_V2_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- cgroup_v2_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@REALPATH_RE'):
- a = line.partition(
- '@REALPATH_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- realpath_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@ENVIRON_RE'):
- a = line.partition(
- '@ENVIRON_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- environ_re_list.append((badness_adj, reg_exp))
-
-
-except PermissionError:
- errprint('PermissionError', conf_err_mess)
- exit(1)
-except UnicodeDecodeError:
- errprint('UnicodeDecodeError', conf_err_mess)
- exit(1)
-except IsADirectoryError:
- errprint('IsADirectoryError', conf_err_mess)
- exit(1)
-except IndexError:
- errprint('IndexError', conf_err_mess)
- exit(1)
-except FileNotFoundError:
- errprint('FileNotFoundError', conf_err_mess)
- exit(1)
-
-
-if processname_re_list == []:
- regex_matching = False
-else:
- regex_matching = True
-
-
-if cmdline_re_list == []:
- re_match_cmdline = False
-else:
- re_match_cmdline = True
-
-
-if uid_re_list == []:
- re_match_uid = False
-else:
- re_match_uid = True
-
-
-if environ_re_list == []:
- re_match_environ = False
-else:
- re_match_environ = True
-
-
-if realpath_re_list == []:
- re_match_realpath = False
-else:
- re_match_realpath = True
-
-
-if cgroup_v1_re_list == []:
- re_match_cgroup_v1 = False
-else:
- re_match_cgroup_v1 = True
-
-if cgroup_v2_re_list == []:
- re_match_cgroup_v2 = False
-else:
- re_match_cgroup_v2 = True
-
-
-# print(processname_re_list)
-# print(cmdline_re_list)
-# print(uid_re_list)
-# print(environ_re_list)
-# print(realpath_re_list)
-# print(cgroup_v1_re_list)
-# print(cgroup_v2_re_list)
-
-# print(soft_actions_list)
-
-if soft_actions_list == []:
- soft_actions = False
-else:
- soft_actions = True
-
-# print('soft_actions:', soft_actions)
-
-##########################################################################
-
-
-# extracting parameters from the dictionary
-# check for all necessary parameters
-# validation of all parameters
-psi_debug = conf_parse_bool('psi_debug')
-print_total_stat = conf_parse_bool('print_total_stat')
-print_proc_table = conf_parse_bool('print_proc_table')
-forbid_negative_badness = conf_parse_bool('forbid_negative_badness')
-print_victim_info = conf_parse_bool('print_victim_info')
-print_config = conf_parse_bool('print_config')
-print_mem_check_results = conf_parse_bool('print_mem_check_results')
-print_sleep_periods = conf_parse_bool('print_sleep_periods')
-gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings')
-gui_notifications = conf_parse_bool('gui_notifications')
-decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj')
-ignore_psi = conf_parse_bool('ignore_psi')
-
-(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent
- ) = calculate_percent('mem_min_sigterm')
-
-(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent
- ) = calculate_percent('mem_min_sigkill')
-
-(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent
- ) = calculate_percent('zram_max_sigterm')
-
-(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent
- ) = calculate_percent('zram_max_sigkill')
-
-(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent
- ) = calculate_percent('mem_min_warnings')
-
-(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent
- ) = calculate_percent('zram_max_warnings')
-
-
-if 'rate_mem' in config_dict:
- rate_mem = string_to_float_convert_test(config_dict['rate_mem'])
- if rate_mem is None:
- errprint('Invalid rate_mem value, not float\nExit')
- exit(1)
- if rate_mem <= 0:
- errprint('rate_mem MUST be > 0\nExit')
- exit(1)
-else:
- errprint('rate_mem not in config\nExit')
- exit(1)
-
-
-if 'rate_swap' in config_dict:
- rate_swap = string_to_float_convert_test(config_dict['rate_swap'])
- if rate_swap is None:
- errprint('Invalid rate_swap value, not float\nExit')
- exit(1)
- if rate_swap <= 0:
- errprint('rate_swap MUST be > 0\nExit')
- exit(1)
-else:
- errprint('rate_swap not in config\nExit')
- exit(1)
-
-
-if 'rate_zram' in config_dict:
- rate_zram = string_to_float_convert_test(config_dict['rate_zram'])
- if rate_zram is None:
- errprint('Invalid rate_zram value, not float\nExit')
- exit(1)
- if rate_zram <= 0:
- errprint('rate_zram MUST be > 0\nExit')
- exit(1)
-else:
- errprint('rate_zram not in config\nExit')
- exit(1)
-
-
-if 'swap_min_sigterm' in config_dict:
- swap_min_sigterm = config_dict['swap_min_sigterm']
-else:
- errprint('swap_min_sigterm not in config\nExit')
- exit(1)
-
-
-if 'swap_min_sigkill' in config_dict:
- swap_min_sigkill = config_dict['swap_min_sigkill']
-else:
- errprint('swap_min_sigkill not in config\nExit')
- exit(1)
-
-
-if 'min_delay_after_sigterm' in config_dict:
- min_delay_after_sigterm = string_to_float_convert_test(
- config_dict['min_delay_after_sigterm'])
- if min_delay_after_sigterm is None:
- errprint('Invalid min_delay_after_sigterm value, not float\nExit')
- exit(1)
- if min_delay_after_sigterm < 0:
- errprint('min_delay_after_sigterm must be positiv\nExit')
- exit(1)
-else:
- errprint('min_delay_after_sigterm not in config\nExit')
- exit(1)
-
-
-if 'psi_post_action_delay' in config_dict:
- psi_post_action_delay = string_to_float_convert_test(
- config_dict['psi_post_action_delay'])
- if psi_post_action_delay is None:
- errprint('Invalid psi_post_action_delay value, not float\nExit')
- exit(1)
- if psi_post_action_delay < 0:
- errprint('psi_post_action_delay must be positive\nExit')
- exit(1)
-else:
- errprint('psi_post_action_delay not in config\nExit')
- exit(1)
-
-
-if 'sigkill_psi_threshold' in config_dict:
- sigkill_psi_threshold = string_to_float_convert_test(
- config_dict['sigkill_psi_threshold'])
- if sigkill_psi_threshold is None:
- errprint('Invalid sigkill_psi_threshold value, not float\nExit')
- exit(1)
- if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100:
- errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit')
- exit(1)
-else:
- errprint('sigkill_psi_threshold not in config\nExit')
- exit(1)
-
-
-if 'sigterm_psi_threshold' in config_dict:
- sigterm_psi_threshold = string_to_float_convert_test(
- config_dict['sigterm_psi_threshold'])
- if sigterm_psi_threshold is None:
- errprint('Invalid sigterm_psi_threshold value, not float\nExit')
- exit(1)
- if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100:
- errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit')
- exit(1)
-else:
- errprint('sigterm_psi_threshold not in config\nExit')
- exit(1)
-
-
-if 'min_badness' in config_dict:
- min_badness = string_to_int_convert_test(
- config_dict['min_badness'])
- if min_badness is None:
- errprint('Invalid min_badness value, not integer\nExit')
- exit(1)
- if min_badness < 0 or min_badness > 1000:
- errprint('Invalud min_badness value\nExit')
- exit(1)
-else:
- errprint('min_badness not in config\nExit')
- exit(1)
-
-
-if 'oom_score_adj_max' in config_dict:
- oom_score_adj_max = string_to_int_convert_test(
- config_dict['oom_score_adj_max'])
- if oom_score_adj_max is None:
- errprint('Invalid oom_score_adj_max value, not integer\nExit')
- exit(1)
- if oom_score_adj_max < 0 or oom_score_adj_max > 1000:
- errprint('Invalid oom_score_adj_max value\nExit')
- exit(1)
-else:
- errprint('oom_score_adj_max not in config\nExit')
- exit(1)
-
-
-if 'min_time_between_warnings' in config_dict:
- min_time_between_warnings = string_to_float_convert_test(
- config_dict['min_time_between_warnings'])
- if min_time_between_warnings is None:
- errprint('Invalid min_time_between_warnings value, not float\nExit')
- exit(1)
- if min_time_between_warnings < 1 or min_time_between_warnings > 300:
- errprint('min_time_between_warnings value out of range [1; 300]\nExit')
- exit(1)
-else:
- errprint('min_time_between_warnings not in config\nExit')
- exit(1)
-
-
-if 'swap_min_warnings' in config_dict:
- swap_min_warnings = config_dict['swap_min_warnings']
-else:
- errprint('swap_min_warnings not in config\nExit')
- exit(1)
-
-
-if 'max_ancestry_depth' in config_dict:
- max_ancestry_depth = string_to_int_convert_test(
- config_dict['max_ancestry_depth'])
- if min_badness is None:
- errprint('Invalid max_ancestry_depth value, not integer\nExit')
- exit(1)
- if max_ancestry_depth < 1:
- errprint('Invalud max_ancestry_depth value\nExit')
- exit(1)
-else:
- errprint('max_ancestry_depth is not in config\nExit')
- exit(1)
-
-
-if 'max_post_sigterm_victim_lifetime' in config_dict:
- max_post_sigterm_victim_lifetime = string_to_float_convert_test(
- config_dict['max_post_sigterm_victim_lifetime'])
- if max_post_sigterm_victim_lifetime is None:
- errprint('Invalid max_post_sigterm_victim_lifetime val'
- 'ue, not float\nExit')
- exit(1)
- if max_post_sigterm_victim_lifetime < 0:
- errprint('max_post_sigterm_victim_lifetime must be non-n'
- 'egative number\nExit')
- exit(1)
-else:
- errprint('max_post_sigterm_victim_lifetime is not in config\nExit')
- exit(1)
-
-
-if 'post_kill_exe' in config_dict:
- post_kill_exe = config_dict['post_kill_exe']
-else:
- errprint('post_kill_exe is not in config\nExit')
- exit(1)
-
-
-if 'psi_path' in config_dict:
- psi_path = config_dict['psi_path']
-else:
- errprint('psi_path is not in config\nExit')
- exit(1)
-
-
-if 'psi_metrics' in config_dict:
- psi_metrics = config_dict['psi_metrics']
-else:
- errprint('psi_metrics is not in config\nExit')
- exit(1)
-
-
-if 'warning_exe' in config_dict:
- warning_exe = config_dict['warning_exe']
- if warning_exe != '':
- check_warning_exe = True
- else:
- check_warning_exe = False
-else:
- errprint('warning_exe is not in config\nExit')
- exit(1)
-
-
-if 'extra_table_info' in config_dict:
- extra_table_info = config_dict['extra_table_info']
- if (extra_table_info != 'None' and
- extra_table_info != 'cgroup_v1' and
- extra_table_info != 'cgroup_v2' and
- extra_table_info != 'cmdline' and
- extra_table_info != 'environ' and
- extra_table_info != 'realpath' and
- extra_table_info != 'All'):
-
- errprint('Invalid config: invalid extra_table_info value\nExit')
- exit(1)
-else:
- errprint('Invalid config: extra_table_info is not in config\nExit')
- exit(1)
-
-
-separate_log = conf_parse_bool('separate_log')
-
-if separate_log:
-
- import logging
- from logging import basicConfig
- from logging import info
-
- log_dir = '/var/log/nohang'
-
- try:
- os.mkdir(log_dir)
- except PermissionError:
- print('ERROR: can not create log dir')
- except FileExistsError:
- pass
-
- logfile = log_dir + '/nohang.log'
-
- try:
- with open(logfile, 'a') as f:
- pass
- except FileNotFoundError:
- print('ERROR: log FileNotFoundError')
- except PermissionError:
- print('ERROR: log PermissionError')
-
- try:
- basicConfig(
- filename=logfile,
- level=logging.INFO,
- format="%(asctime)s: %(message)s")
- except PermissionError:
- errprint('ERROR: Permission denied: {}'.format(logfile))
- except FileNotFoundError:
- errprint('ERROR: FileNotFoundError: {}'.format(logfile))
-
-
-if 'min_mem_report_interval' in config_dict:
- min_mem_report_interval = string_to_float_convert_test(
- config_dict['min_mem_report_interval'])
- if min_mem_report_interval is None:
- errprint('Invalid min_mem_report_interval value, not float\nExit')
- exit(1)
- if min_mem_report_interval < 0:
- errprint('min_mem_report_interval must be non-negative number\nExit')
- exit(1)
-else:
- errprint('min_mem_report_interval is not in config\nExit')
- exit(1)
-
-
-if 'max_sleep' in config_dict:
- max_sleep = string_to_float_convert_test(
- config_dict['max_sleep'])
- if max_sleep is None:
- errprint('Invalid max_sleep value, not float\nExit')
- exit(1)
- if max_sleep <= 0:
- errprint('max_sleep must be positive number\nExit')
- exit(1)
-else:
- errprint('max_sleep is not in config\nExit')
- exit(1)
-
-
-if 'min_sleep' in config_dict:
- min_sleep = string_to_float_convert_test(
- config_dict['min_sleep'])
- if min_sleep is None:
- errprint('Invalid min_sleep value, not float\nExit')
- exit(1)
- if min_sleep <= 0:
- errprint('min_sleep must be positive number\nExit')
- exit(1)
-else:
- errprint('min_sleep is not in config\nExit')
- exit(1)
-
-
-if 'over_sleep' in config_dict:
- over_sleep = string_to_float_convert_test(
- config_dict['over_sleep'])
- if over_sleep is None:
- errprint('Invalid over_sleep value, not float\nExit')
- exit(1)
- if over_sleep <= 0:
- errprint('over_sleep must be positive number\nExit')
- exit(1)
-else:
- errprint('over_sleep is not in config\nExit')
- exit(1)
-
-
-if max_sleep < min_sleep:
- errprint(
- 'max_sleep value must not exceed min_sleep value.\nExit'
- )
- exit(1)
-
-
-if min_sleep < over_sleep:
- errprint(
- 'min_sleep value must not exceed over_sleep value.\nExit'
- )
- exit(1)
-
-
-if max_sleep == min_sleep:
- stable_sleep = True
-else:
- stable_sleep = False
-
-
-if print_proc_table_flag:
-
- if not root:
- log('WARNING: effective UID != 0; euid={}; processes with other e'
- 'uids will be invisible for nohang'.format(self_uid))
-
- func_print_proc_table()
-
-
-##########################################################################
-
-
-psi_support = os.path.exists(psi_path)
-
-
-##########################################################################
-
-
-# Get KiB levels if it's possible.
-
-
-def get_swap_threshold_tuple(string):
- # re (Num %, True) or (Num KiB, False)
- """Returns KiB value if abs val was set in config, or tuple with %"""
- # return tuple with abs and bool: (abs %, True) or (abs MiB, False)
-
- if string.endswith('%'):
- valid = string_to_float_convert_test(string[:-1])
- if valid is None:
- errprint('somewhere swap unit is not float_%')
- exit(1)
-
- value = float(string[:-1].strip())
- if value < 0 or value > 100:
- errprint('invalid value, must be from the range[0; 100] %')
- exit(1)
-
- return value, True
-
- elif string.endswith('M'):
- valid = string_to_float_convert_test(string[:-1])
- if valid is None:
- errprint('somewhere swap unit is not float_M')
- exit(1)
-
- value = float(string[:-1].strip()) * 1024
- if value < 0:
- errprint('invalid unit in config (negative value)')
- exit(1)
-
- return value, False
-
- else:
- errprint(
- 'Invalid config file. There are invalid units somewhere\nExit')
- exit(1)
-
-
-swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm)
-swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill)
-swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings)
-
-
-swap_term_is_percent = swap_min_sigterm_tuple[1]
-if swap_term_is_percent:
- swap_min_sigterm_percent = swap_min_sigterm_tuple[0]
-else:
- swap_min_sigterm_kb = swap_min_sigterm_tuple[0]
-
-
-swap_kill_is_percent = swap_min_sigkill_tuple[1]
-if swap_kill_is_percent:
- swap_min_sigkill_percent = swap_min_sigkill_tuple[0]
-else:
- swap_min_sigkill_kb = swap_min_sigkill_tuple[0]
-
-
-swap_warn_is_percent = swap_min_warnings_tuple[1]
-if swap_warn_is_percent:
- swap_min_warnings_percent = swap_min_warnings_tuple[0]
-else:
- swap_min_warnings_kb = swap_min_warnings_tuple[0]
-
-
-##########################################################################
-
-# outdated section, need fixes
-
-if print_config:
-
- print(
- '\n1. Memory levels to respond to as an OOM threat\n[display'
- 'ing these options need fix]\n')
-
- print('mem_min_sigterm: {} MiB, {} %'.format(
- round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1)))
- print('mem_min_sigkill: {} MiB, {} %'.format(
- round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1)))
-
- print('swap_min_sigterm: {}'.format(swap_min_sigterm))
- print('swap_min_sigkill: {}'.format(swap_min_sigkill))
-
- print('zram_max_sigterm: {} MiB, {} %'.format(
- round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1)))
- print('zram_max_sigkill: {} MiB, {} %'.format(
- round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1)))
-
- print('\n2. The frequency of checking the level of available m'
- 'emory (and CPU usage)\n')
- print('rate_mem: {}'.format(rate_mem))
- print('rate_swap: {}'.format(rate_swap))
- print('rate_zram: {}'.format(rate_zram))
-
- print('\n3. The prevention of killing innocent victims\n')
- print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm))
- print('min_badness: {}'.format(min_badness))
-
- print('decrease_oom_score_adj: {}'.format(
- decrease_oom_score_adj
- ))
- if decrease_oom_score_adj:
- print('oom_score_adj_max: {}'.format(oom_score_adj_max))
-
- print('\n4. Impact on the badness of processes via matching their'
- ' names, cmdlines ir UIDs with regular expressions\n')
-
- print('(todo)')
-
- print('\n5. The execution of a specific command instead of sen'
- 'ding the\nSIGTERM signal\n')
-
- print('\n6. GUI notifications:\n- OOM prevention results and\n- low m'
- 'emory warnings\n')
- print('gui_notifications: {}'.format(gui_notifications))
-
- print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings))
- if gui_low_memory_warnings:
- print('min_time_between_warnings: {}'.format(
- min_time_between_warnings))
-
- print('mem_min_warnings: {} MiB, {} %'.format(
- round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1)))
-
- print('swap_min_warnings: {}'.format(swap_min_warnings))
-
- print('zram_max_warnings: {} MiB, {} %'.format(
- round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1)))
-
- print('\n7. Output verbosity\n')
- print('print_config: {}'.format(print_config))
- print('print_mem_check_results: {}'.format(print_mem_check_results))
- print('print_sleep_periods: {}\n'.format(print_sleep_periods))
-
-
-##########################################################################
-
-
-# for calculating the column width when printing mem and zram
-mem_len = len(str(round(mem_total / 1024.0)))
-
-if gui_notifications:
- notify_sig_dict = {SIGKILL: 'Killing',
- SIGTERM: 'Terminating'}
-
-
-# convert rates from MiB/s to KiB/s
-rate_mem = rate_mem * 1024
-rate_swap = rate_swap * 1024
-rate_zram = rate_zram * 1024
-
-
-warn_time_now = 0
-warn_time_delta = 1000
-warn_timer = 0
-
-
-##########################################################################
-
-
-if not root:
- log('WARNING: effective UID != 0; euid={}; processes with other e'
- 'uids will be invisible for nohang'.format(self_uid))
-
-
-# Try to lock all memory
-
-mlockall()
-
-##########################################################################
-
-
-# print_self_rss()
-
-
-log('Monitoring has started!')
-
-stdout.flush()
-
-##########################################################################
-
-psi_avg_string = '' # will be overwritten if PSI monitoring enabled
-
-
-if psi_support and not ignore_psi:
- psi_t0 = time()
-
-
-if print_mem_check_results:
-
- # to find delta mem
- wt2 = 0
- new_mem = 0
-
- # init mem report interval
- report0 = 0
-
-
-# handle signals
-for i in sig_list:
- signal(i, signal_handler)
-
-
-while True:
-
- if psi_support and not ignore_psi:
-
- psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
-
- if print_mem_check_results:
- psi_avg_string = 'PSI avg value: {} | '.format(
- str(psi_avg_value).rjust(6))
-
- if psi_avg_value >= sigkill_psi_threshold:
- sigkill_psi_exceeded = True
- else:
- sigkill_psi_exceeded = False
-
- if psi_avg_value >= sigterm_psi_threshold:
- sigterm_psi_exceeded = True
- else:
- sigterm_psi_exceeded = False
-
- if time() - psi_t0 >= psi_post_action_delay:
- psi_post_action_delay_exceeded = True
- else:
- psi_post_action_delay_exceeded = False
-
- if psi_debug:
- log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps'
- 'i_post_action_delay_exceeded: {}'.format(
- sigterm_psi_exceeded,
- sigkill_psi_exceeded,
- psi_post_action_delay_exceeded))
-
- if sigkill_psi_exceeded and psi_post_action_delay_exceeded:
- time0 = time()
- mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \
- 'old ({})'.format(
- psi_avg_value, sigkill_psi_threshold)
-
- implement_corrective_action(SIGKILL)
-
- psi_t0 = time()
- continue
-
- if sigterm_psi_exceeded and psi_post_action_delay_exceeded:
- time0 = time()
- mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \
- 'shold ({})'.format(psi_avg_value, sigterm_psi_threshold)
-
- implement_corrective_action(SIGTERM)
-
- psi_t0 = time()
- continue
-
- mem_available, swap_total, swap_free = check_mem_and_swap()
-
- # if swap_min_sigkill is set in percent
- if swap_kill_is_percent:
- swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0
-
- if swap_term_is_percent:
- swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0
-
- if swap_warn_is_percent:
- swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0
-
- mem_used_zram = check_zram()
-
- if print_mem_check_results:
-
- wt1 = time()
-
- delta = (mem_available + swap_free) - new_mem
-
- t_cycle = wt1 - wt2
-
- report_delta = wt1 - report0
-
- if report_delta >= min_mem_report_interval:
-
- mem_report = True
- new_mem = mem_available + swap_free
-
- report0 = wt1
-
- else:
- mem_report = False
-
- wt2 = time()
-
- if mem_report:
-
- speed = delta / 1024.0 / report_delta
- speed_info = ' | dMem: {} M/s'.format(
- str(round(speed)).rjust(5)
- )
-
- # Calculate 'swap-column' width
- swap_len = len(str(round(swap_total / 1024.0)))
-
- # Output available mem sizes
- if swap_total == 0 and mem_used_zram == 0:
- log('{}MemAvail: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- speed_info
- )
- )
-
- elif swap_total > 0 and mem_used_zram == 0:
- log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- human(swap_free, swap_len),
- just_percent_swap(swap_free / (swap_total + 0.1)),
- speed_info
- )
- )
-
- else:
- log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
- 'UsedZram: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- human(swap_free, swap_len),
- just_percent_swap(swap_free / (swap_total + 0.1)),
- human(mem_used_zram, mem_len),
- just_percent_mem(mem_used_zram / mem_total),
- speed_info
- )
- )
-
- if swap_total > swap_min_sigkill_kb:
- swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1))
- else:
- swap_sigkill_pc = '-'
-
- if swap_total > swap_min_sigterm_kb:
- swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1))
- else:
- swap_sigterm_pc = '-'
-
- # MEM SWAP KILL
- if (mem_available <= mem_min_sigkill_kb and
- swap_free <= swap_min_sigkill_kb):
- time0 = time()
-
- mem_info = 'Hard threshold exceeded\nMemory status that requ' \
- 'ires corrective actions:' \
- '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
- 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
- 'p_min_sigkill [{} MiB, {} %]'.format(
- kib_to_mib(mem_available),
- percent(mem_available / mem_total),
- kib_to_mib(mem_min_sigkill_kb),
- percent(mem_min_sigkill_kb / mem_total),
- kib_to_mib(swap_free),
- percent(swap_free / (swap_total + 0.1)),
- kib_to_mib(swap_min_sigkill_kb),
- swap_sigkill_pc)
-
- implement_corrective_action(SIGKILL)
-
- psi_t0 = time()
- continue
-
- # ZRAM KILL
- if mem_used_zram >= zram_max_sigkill_kb:
- time0 = time()
-
- mem_info = 'Hard threshold exceeded\nMemory status that requir' \
- 'es corrective actions:' \
- '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
- 'kill [{} MiB, {} %]'.format(
- kib_to_mib(mem_used_zram),
- percent(mem_used_zram / mem_total),
- kib_to_mib(zram_max_sigkill_kb),
- percent(zram_max_sigkill_kb / mem_total))
-
- implement_corrective_action(SIGKILL)
-
- psi_t0 = time()
- continue
-
- # MEM SWAP TERM
- if mem_available <= mem_min_sigterm_kb and \
- swap_free <= swap_min_sigterm_kb:
-
- time0 = time()
-
- mem_info = 'Soft threshold exceeded\nMemory status that requi' \
- 'res corrective actions:' \
- '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
- 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
- 'p_min_sigterm [{} MiB, {} %]'.format(
- kib_to_mib(mem_available),
- percent(mem_available / mem_total),
- kib_to_mib(mem_min_sigterm_kb),
- round(mem_min_sigterm_percent, 1),
- kib_to_mib(swap_free),
- percent(swap_free / (swap_total + 0.1)),
- kib_to_mib(swap_min_sigterm_kb),
- swap_sigterm_pc)
-
- implement_corrective_action(SIGTERM)
-
- psi_t0 = time()
- continue
-
- # ZRAM TERM
- if mem_used_zram >= zram_max_sigterm_kb:
- time0 = time()
-
- mem_info = 'Soft threshold exceeded\nMemory status that requ' \
- 'ires corrective actions:' \
- '\n MemUsedZram [{} MiB, {} %] >= ' \
- 'zram_max_sigterm [{} M, {} %]'.format(
- kib_to_mib(mem_used_zram),
- percent(mem_used_zram / mem_total),
- kib_to_mib(zram_max_sigterm_kb),
- percent(zram_max_sigterm_kb / mem_total))
-
- implement_corrective_action(SIGTERM)
-
- psi_t0 = time()
- continue
-
- # LOW MEMORY WARNINGS
- if gui_low_memory_warnings:
-
- if mem_available <= mem_min_warnings_kb and \
- swap_free <= swap_min_warnings_kb + 0.1 or \
- mem_used_zram >= zram_max_warnings_kb:
- warn_time_delta = time() - warn_time_now
- warn_time_now = time()
- warn_timer += warn_time_delta
- if warn_timer > min_time_between_warnings:
- send_notify_warn()
- warn_timer = 0
-
-
-
-
- # SLEEP BETWEEN MEM CHECKS
- sleep_after_check_mem()
-
-
-
-
-
-
-
-
-
-
diff --git a/trash/n11 b/trash/n11
deleted file mode 100755
index 6ffa8cc..0000000
--- a/trash/n11
+++ /dev/null
@@ -1,3073 +0,0 @@
-#!/usr/bin/env python3
-"""A daemon that prevents OOM in Linux systems."""
-
-import os
-from ctypes import CDLL
-from time import sleep, time
-from operator import itemgetter
-from sys import stdout, stderr, argv, exit, version
-from re import search
-from sre_constants import error as invalid_re
-from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP
-
-
-##########################################################################
-
-# define functions
-
-'''
-def self_rss():
- """
- """
- return pid_to_status(self_pid)[5]
-
-
-def print_self_rss():
- """
- """
- log('Self RSS: {} MiB'.format(self_rss()))
-'''
-
-
-
-
-
-
-
-
-
-
-
-
-
-def cgroup2_root():
- """
- """
- with open('/proc/mounts') as f:
- for line in f:
- if ' cgroup2 ' in line:
- return line[7:].rpartition(' cgroup2 ')[0]
-
-
-
-
-def cgroup2_to_psi_file(cg2):
- """
- """
- cg2root = cgroup2_root()
- if cg2root is not None:
- return cg2root + cg2 + '/memory.pressure'
-
-
-
-
-
-def get_psi_mem_files(cgroup2_path):
- """
- """
-
- path_list = []
-
- for root, dirs, files in os.walk(cgroup2_path):
- for file in files:
- path = os.path.join(root, file)
- if path.endswith('/memory.pressure'): #############
- path_list.append(path)
-
- return path_list
-
-
-def psi_path_to_cgroup2(path):
- """
- """
- return path.partition(i)[2][:-16]
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-def get_swap_threshold_tuple(string):
- # re (Num %, True) or (Num KiB, False)
- """Returns KiB value if abs val was set in config, or tuple with %"""
- # return tuple with abs and bool: (abs %, True) or (abs MiB, False)
-
- if string.endswith('%'):
- valid = string_to_float_convert_test(string[:-1])
- if valid is None:
- errprint('somewhere swap unit is not float_%')
- exit(1)
-
- value = float(string[:-1].strip())
- if value < 0 or value > 100:
- errprint('invalid value, must be from the range[0; 100] %')
- exit(1)
-
- return value, True
-
- elif string.endswith('M'):
- valid = string_to_float_convert_test(string[:-1])
- if valid is None:
- errprint('somewhere swap unit is not float_M')
- exit(1)
-
- value = float(string[:-1].strip()) * 1024
- if value < 0:
- errprint('invalid unit in config (negative value)')
- exit(1)
-
- return value, False
-
- else:
- errprint(
- 'Invalid config file. There are invalid units somewhere\nExit')
- exit(1)
-
-
-def find_cgroup_indexes():
- """ Find cgroup-line positions in /proc/*/cgroup file.
- """
-
- cgroup_v1_index = cgroup_v2_index = None
-
- with open('/proc/self/cgroup') as f:
- for index, line in enumerate(f):
- if ':name=' in line:
- cgroup_v1_index = index
- if line.startswith('0::'):
- cgroup_v2_index = index
-
- return cgroup_v1_index, cgroup_v2_index
-
-
-def pid_to_rss(pid):
- """
- """
- try:
- rss = int(rline1(
- '/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE
- except IndexError:
- rss = None
- except FileNotFoundError:
- rss = None
- except ProcessLookupError:
- rss = None
- return rss
-
-
-def pid_to_vm_size(pid):
- """
- """
- try:
- vm_size = int(rline1(
- '/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE
- except IndexError:
- vm_size = None
- except FileNotFoundError:
- vm_size = None
- except ProcessLookupError:
- vm_size = None
- return vm_size
-
-
-def signal_handler(signum, frame):
- """
- """
- for i in sig_list:
- signal(i, signal_handler_inner)
- log('Signal handler called with the {} signal '.format(
- sig_dict[signum]))
- update_stat_dict_and_print(None)
- log('Exit')
- exit()
-
-
-def signal_handler_inner(signum, frame):
- """
- """
- log('Signal handler called with the {} signal (ignored) '.format(
- sig_dict[signum]))
-
-
-def exe(cmd):
- """
- """
- log('Execute the command: {}'.format(cmd))
- t0 = time()
- write_self_oom_score_adj(self_oom_score_adj_max)
- err = os.system(cmd)
- write_self_oom_score_adj(self_oom_score_adj_min)
- dt = time() - t0
- log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
- return err
-
-
-def write(path, string):
- """
- """
- with open(path, 'w') as f:
- f.write(string)
-
-
-def write_self_oom_score_adj(new_value):
- """
- """
- if root:
- write('/proc/self/oom_score_adj', new_value)
-
-
-def valid_re(reg_exp):
- """Validate regular expression.
- """
- try:
- search(reg_exp, '')
- except invalid_re:
- log('Invalid config: invalid regexp: {}'.format(reg_exp))
- exit(1)
-
-
-def func_print_proc_table():
- """
- """
- print_proc_table = True
- find_victim(print_proc_table)
- exit()
-
-
-def log(*msg):
- """
- """
- try:
- print(*msg)
- except OSError:
- sleep(0.01)
- if separate_log:
- try:
- info(*msg)
- except OSError:
- sleep(0.01)
-
-
-def print_version():
- """
- """
- try:
- v = rline1('/etc/nohang/version')
- except FileNotFoundError:
- v = None
- if v is None:
- print('Nohang unknown version')
- else:
- print('Nohang ' + v)
- exit()
-
-
-def test():
- """
- """
- print('\n(This option is not ready to use!)\n')
-
- print(version)
- print(argv)
-
- hr = '=================================='
- print(hr)
- print("uptime()")
- print(uptime())
-
- print(hr)
- print("os.uname()")
- print(os.uname())
-
- print(hr)
- print("pid_to_starttime('self')")
- print(pid_to_starttime('self'))
-
- print(hr)
- print("get_victim_id('self')")
- print(get_victim_id('self'))
-
- print(hr)
- print("errprint('test')")
- print(errprint('test'))
-
- print(hr)
- print("mlockall()")
- print(mlockall())
-
- print(hr)
- print("pid_to_state('2')")
- print(pid_to_state('2'))
-
- exit()
-
-
-def pid_to_cgroup_v1(pid):
- """
- """
- cgroup_v1 = ''
- try:
- with open('/proc/' + pid + '/cgroup') as f:
- for index, line in enumerate(f):
- if index == cgroup_v1_index:
- cgroup_v1 = '/' + line.partition('/')[2][:-1]
- return cgroup_v1
- except FileNotFoundError:
- return ''
-
-
-def pid_to_cgroup_v2(pid):
- """
- """
- cgroup_v2 = ''
- try:
- with open('/proc/' + pid + '/cgroup') as f:
- for index, line in enumerate(f):
- if index == cgroup_v2_index:
- cgroup_v2 = line[3:-1]
- return cgroup_v2
- except FileNotFoundError:
- return ''
-
-
-def pid_to_starttime(pid):
- """ handle FNF error!
- """
- try:
- starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[
- 2].split(' ')[20]
-
- except UnicodeDecodeError:
- # print('LOL')
- with open('/proc/' + pid + '/stat', 'rb') as f:
- starttime = f.read().decode('utf-8', 'ignore').rpartition(
- ')')[2].split(' ')[20]
-
- return float(starttime) / SC_CLK_TCK
-
-
-def get_victim_id(pid):
- """victim_id is starttime + pid"""
- try:
- return rline1('/proc/' + pid + '/stat').rpartition(
- ')')[2].split(' ')[20] + '_pid' + pid
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
-
-
-def pid_to_state(pid):
- """ Handle FNF error! (BTW it already handled in find_victim_info())
- """
- return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1]
-
-
-def pid_to_name(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/comm', 'rb') as f:
- return f.read().decode('utf-8', 'ignore')[:-1]
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
-
-
-def pid_to_ppid(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/status') as f:
- for n, line in enumerate(f):
- if n is ppid_index:
- return line.split('\t')[1].strip()
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
- except UnicodeDecodeError:
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
- for i in range(len(f_list)):
- if i is ppid_index:
- return f_list[i].split('\t')[1]
-
-
-def pid_to_ancestry(pid, max_ancestry_depth=1):
- """
- """
- if max_ancestry_depth == 1:
- ppid = pid_to_ppid(pid)
- pname = pid_to_name(ppid)
- return '\n PPID: {} ({})'.format(ppid, pname)
- if max_ancestry_depth == 0:
- return ''
- anc_list = []
- for i in range(max_ancestry_depth):
- ppid = pid_to_ppid(pid)
- pname = pid_to_name(ppid)
- anc_list.append((ppid, pname))
- if ppid == '1':
- break
- pid = ppid
- a = ''
- for i in anc_list:
- a = a + ' <= PID {} ({})'.format(i[0], i[1])
- return '\n Ancestry: ' + a[4:]
-
-
-def pid_to_cmdline(pid):
- """
- Get process cmdline by pid.
-
- pid: str pid of required process
- returns string cmdline
- """
- try:
- with open('/proc/' + pid + '/cmdline') as f:
- return f.read().replace('\x00', ' ').rstrip()
- except FileNotFoundError:
- return ''
-
-
-def pid_to_environ(pid):
- """
- Get process environ by pid.
-
- pid: str pid of required process
- returns string environ
- """
- try:
- with open('/proc/' + pid + '/environ') as f:
- return f.read().replace('\x00', ' ').rstrip()
- except FileNotFoundError:
- return ''
-
-
-def pid_to_realpath(pid):
- """
- """
- try:
- return os.path.realpath('/proc/' + pid + '/exe')
- except FileNotFoundError:
- return ''
-
-
-def pid_to_uid(pid):
- """return euid"""
- try:
- with open('/proc/' + pid + '/status') as f:
- for n, line in enumerate(f):
- if n is uid_index:
- return line.split('\t')[2]
- except UnicodeDecodeError:
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
- return f_list[uid_index].split('\t')[2]
- except FileNotFoundError:
- return ''
-
-
-def pid_to_badness(pid):
- """Find and modify badness (if it needs)."""
-
- try:
-
- oom_score = int(rline1('/proc/' + pid + '/oom_score'))
- badness = oom_score
-
- if decrease_oom_score_adj:
- oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
- if badness > oom_score_adj_max and oom_score_adj > 0:
- badness = badness - oom_score_adj + oom_score_adj_max
-
- if regex_matching:
- name = pid_to_name(pid)
- for re_tup in processname_re_list:
- if search(re_tup[1], name) is not None:
- badness += int(re_tup[0])
-
- if re_match_cgroup_v1:
- cgroup_v1 = pid_to_cgroup_v1(pid)
- for re_tup in cgroup_v1_re_list:
- if search(re_tup[1], cgroup_v1) is not None:
- badness += int(re_tup[0])
-
- if re_match_cgroup_v2:
- cgroup_v2 = pid_to_cgroup_v2(pid)
- for re_tup in cgroup_v2_re_list:
- if search(re_tup[1], cgroup_v2) is not None:
- badness += int(re_tup[0])
-
- if re_match_realpath:
- realpath = pid_to_realpath(pid)
- for re_tup in realpath_re_list:
- if search(re_tup[1], realpath) is not None:
- badness += int(re_tup[0])
-
- if re_match_cmdline:
- cmdline = pid_to_cmdline(pid)
- for re_tup in cmdline_re_list:
- if search(re_tup[1], cmdline) is not None:
- badness += int(re_tup[0])
-
- if re_match_environ:
- environ = pid_to_environ(pid)
- for re_tup in environ_re_list:
- if search(re_tup[1], environ) is not None:
- badness += int(re_tup[0])
-
- if re_match_uid:
- uid = pid_to_uid(pid)
- for re_tup in uid_re_list:
- if search(re_tup[1], uid) is not None:
- badness += int(re_tup[0])
-
- if forbid_negative_badness:
- if badness < 0:
- badness = 0
-
- return badness, oom_score
-
- except FileNotFoundError:
- return None, None
- except ProcessLookupError:
- return None, None
-
-
-def pid_to_status(pid):
- """
- """
-
- try:
-
- with open('/proc/' + pid + '/status') as f:
-
- for n, line in enumerate(f):
-
- if n is 0:
- name = line.split('\t')[1][:-1]
-
- if n is state_index:
- state = line.split('\t')[1][0]
- continue
-
- if n is ppid_index:
- ppid = line.split('\t')[1][:-1]
- continue
-
- if n is uid_index:
- uid = line.split('\t')[2]
- continue
-
- if n is vm_size_index:
- vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_rss_index:
- vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_swap_index:
- vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
- break
-
- return name, state, ppid, uid, vm_size, vm_rss, vm_swap
-
- except UnicodeDecodeError:
- return pid_to_status_unicode(pid)
-
- except FileNotFoundError:
- return None
-
- except ProcessLookupError:
- return None
-
- except ValueError:
- return None
-
-
-def pid_to_status_unicode(pid):
- """
- """
- try:
-
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
-
- for i in range(len(f_list)):
-
- if i is 0:
- name = f_list[i].split('\t')[1]
-
- if i is state_index:
- state = f_list[i].split('\t')[1][0]
-
- if i is ppid_index:
- ppid = f_list[i].split('\t')[1]
-
- if i is uid_index:
- uid = f_list[i].split('\t')[2]
-
- if i is vm_size_index:
- vm_size = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_rss_index:
- vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_swap_index:
- vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- return name, state, ppid, uid, vm_size, vm_rss, vm_swap
-
- except FileNotFoundError:
- return None
-
- except ProcessLookupError:
- return None
-
- except ValueError:
- return None
-
-
-def uptime():
- """
- """
- return float(rline1('/proc/uptime').split(' ')[0])
-
-
-def errprint(*text):
- """
- """
- print(*text, file=stderr, flush=True)
-
-
-def mlockall():
- """Lock all memory to prevent swapping nohang process."""
-
- MCL_CURRENT = 1
- MCL_FUTURE = 2
- MCL_ONFAULT = 4
-
- libc = CDLL('libc.so.6', use_errno=True)
-
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
- )
- if result != 0:
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE
- )
- if result != 0:
- log('WARNING: cannot lock all memory')
- else:
- pass
- # log('All memory locked with MCL_CURRENT | MCL_FUTURE')
- else:
- pass
- # log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
-
-
-def update_stat_dict_and_print(key):
- """
- """
-
- if key is not None:
-
- if key not in stat_dict:
-
- stat_dict.update({key: 1})
-
- else:
-
- new_value = stat_dict[key] + 1
- stat_dict.update({key: new_value})
-
- if print_total_stat:
-
- stats_msg = 'Total stat (what happened in the last {}):'.format(
- format_time(time() - start_time))
-
- for i in stat_dict:
- stats_msg += '\n {}: {}'.format(i, stat_dict[i])
-
- log(stats_msg)
-
-
-def find_psi_metrics_value(psi_path, psi_metrics):
- """
- """
-
- if psi_support:
-
- if psi_metrics == 'some_avg10':
- return float(rline1(psi_path).split(' ')[1].split('=')[1])
- if psi_metrics == 'some_avg60':
- return float(rline1(psi_path).split(' ')[2].split('=')[1])
- if psi_metrics == 'some_avg300':
- return float(rline1(psi_path).split(' ')[3].split('=')[1])
-
- if psi_metrics == 'full_avg10':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[1].split('=')[1])
- if psi_metrics == 'full_avg60':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[2].split('=')[1])
- if psi_metrics == 'full_avg300':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[3].split('=')[1])
-
-
-def check_mem_and_swap():
- """find mem_available, swap_total, swap_free"""
- with open('/proc/meminfo') as f:
- for n, line in enumerate(f):
- if n is 2:
- mem_available = int(line.split(':')[1][:-4])
- continue
- if n is swap_total_index:
- swap_total = int(line.split(':')[1][:-4])
- continue
- if n is swap_free_index:
- swap_free = int(line.split(':')[1][:-4])
- break
- return mem_available, swap_total, swap_free
-
-
-def check_zram():
- """find MemUsedZram"""
- disksize_sum = 0
- mem_used_total_sum = 0
-
- for dev in os.listdir('/sys/block'):
- if dev.startswith('zram'):
- stat = zram_stat(dev)
- disksize_sum += int(stat[0])
- mem_used_total_sum += int(stat[1])
-
- # Means that when setting zram disksize = 1 GiB available memory
- # decrease by 0.0042 GiB.
- # Found experimentally, requires clarification with different kernaels and
- # architectures.
- # On small disk drives (up to gigabyte) it can be more, up to 0.0045.
- # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should
- # be 0.001:
- # ("zram uses about 0.1% of the size of the disk"
- # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
- # but this statement contradicts the experimental data.
- # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize
- # Found experimentally.
- ZRAM_DISKSIZE_FACTOR = 0.0042
-
- return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
-
-
-def format_time(t):
- """
- """
- t = int(t)
- if t < 60:
- return '{} sec'.format(t)
- elif t >= 60 and t < 3600:
- m = t // 60
- s = t % 60
- return '{} min {} sec'.format(m, s)
- else:
- h = t // 3600
- s0 = t - h * 3600
- m = s0 // 60
- s = s0 % 60
- return '{} h {} min {} sec'.format(h, m, s)
-
-
-def string_to_float_convert_test(string):
- """Try to interprete string values as floats."""
- try:
- return float(string)
- except ValueError:
- return None
-
-
-def string_to_int_convert_test(string):
- """Try to interpret string values as integers."""
- try:
- return int(string)
- except ValueError:
- return None
-
-
-def conf_parse_string(param):
- """
- Get string parameters from the config dict.
-
- param: config_dict key
- returns config_dict[param].strip()
- """
- if param in config_dict:
- return config_dict[param].strip()
- else:
- errprint('All the necessary parameters must be in the config')
- errprint('There is no "{}" parameter in the config'.format(param))
- exit(1)
-
-
-def conf_parse_bool(param):
- """
- Get bool parameters from the config_dict.
-
- param: config_dicst key
- returns bool
- """
- if param in config_dict:
- param_str = config_dict[param]
- if param_str == 'True':
- return True
- elif param_str == 'False':
- return False
- else:
- errprint('Invalid value of the "{}" parameter.'.format(param))
- errprint('Valid values are True and False.')
- errprint('Exit')
- exit(1)
- else:
- errprint('All the necessary parameters must be in the config')
- errprint('There is no "{}" parameter in the config'.format(param))
- exit(1)
-
-
-def rline1(path):
- """read 1st line from path."""
- try:
- with open(path) as f:
- for line in f:
- return line[:-1]
- except UnicodeDecodeError:
- with open(path, 'rb') as f:
- return f.read(999).decode(
- 'utf-8', 'ignore').split('\n')[0] # use partition()!
-
-
-def kib_to_mib(num):
- """Convert KiB values to MiB values."""
- return round(num / 1024.0)
-
-
-def percent(num):
- """Interprete num as percentage."""
- return round(num * 100, 1)
-
-
-def just_percent_mem(num):
- """convert num to percent and justify"""
- return str(round(num * 100, 1)).rjust(4, ' ')
-
-
-def just_percent_swap(num):
- """
- """
- return str(round(num * 100, 1)).rjust(5, ' ')
-
-
-def human(num, lenth):
- """Convert KiB values to MiB values with right alignment"""
- return str(round(num / 1024)).rjust(lenth, ' ')
-
-
-def zram_stat(zram_id):
- """
- Get zram state.
-
- zram_id: str zram block-device id
- returns bytes diskcize, str mem_used_total
- """
- try:
- disksize = rline1('/sys/block/' + zram_id + '/disksize')
- except FileNotFoundError:
- return '0', '0'
- if disksize == ['0\n']:
- return '0', '0'
- try:
- mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ')
- mm_stat_list = []
- for i in mm_stat:
- if i != '':
- mm_stat_list.append(i)
- mem_used_total = mm_stat_list[2]
- except FileNotFoundError:
- mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total')
- return disksize, mem_used_total # BYTES, str
-
-
-def send_notify_warn():
- """
- Look for process with maximum 'badness' and warn user with notification.
- (implement Low memory warnings)
- """
- log('Warning threshold exceeded')
-
- if check_warning_exe:
- exe(warning_exe)
-
- else:
-
- title = 'Low memory'
-
- body = 'MemAvail: {}%\nSwapFree: {}%'.format(
- round(mem_available / mem_total * 100),
- round(swap_free / (swap_total + 0.1) * 100)
- )
-
- send_notification(title, body)
-
-
-def send_notify(signal, name, pid):
- """
- Notificate about OOM Preventing.
-
- signal: key for notify_sig_dict
- name: str process name
- pid: str process pid
- """
-
- # wait for memory release after corrective action
- # may be useful if free memory was about 0 immediately after
- # corrective action
- sleep(0.05)
-
- title = 'Freeze prevention'
- body = '{} [{}] {}'.format(
- notify_sig_dict[signal],
- pid,
- name.replace(
- # symbol '&' can break notifications in some themes,
- # therefore it is replaced by '*'
- '&', '*'
- )
- )
-
- send_notification(title, body)
-
-
-def send_notify_etc(pid, name, command):
- """
- Notificate about OOM Preventing.
-
- command: str command that will be executed
- name: str process name
- pid: str process pid
- """
- title = 'Freeze prevention'
- body = 'Victim is [{}] {}\nExecute the co' \
- 'mmand:\n{}'.format(
- pid, name.replace('&', '*'), command.replace('&', '*'))
-
- send_notification(title, body)
-
-
-def send_notification(title, body):
- """
- """
- split_by = '#' * 16
-
- t000 = time()
-
- path_to_cache = '/dev/shm/nohang_notify_cache_uid{}_time{}'.format(
- str(self_uid), t000
- )
-
- text = '{}{}{}'.format(title, split_by, body)
-
- try:
- with open(path_to_cache, 'w') as f:
- f.write(text)
- os.chmod(path_to_cache, 0o600)
- except OSError:
- log('OSError while send notification '
- '(No space left on device: /dev/shm)')
- return None
-
- cmd = '{} --uid {} --time {} &'.format(notify_helper_path, self_uid, t000)
-
- exe(cmd)
-
-
-def get_pid_list():
- """
- Find pid list expect kthreads and zombies
- """
- pid_list = []
- for pid in os.listdir('/proc'):
- if os.path.exists('/proc/' + pid + '/exe') is True:
- pid_list.append(pid)
- return pid_list
-
-
-def get_non_decimal_pids():
- """
- """
- non_decimal_list = []
- for pid in pid_list:
- if pid[0].isdecimal() is False:
- non_decimal_list.append(pid)
- return non_decimal_list
-
-
-def find_victim(_print_proc_table):
- """
- Find the process with highest badness and its badness adjustment
- Return pid and badness
- """
-
- ft1 = time()
-
- pid_list = get_pid_list()
-
- pid_list.remove(self_pid)
-
- if '1' in pid_list:
- pid_list.remove('1')
-
- non_decimal_list = get_non_decimal_pids()
-
- for i in non_decimal_list:
- if i in pid_list:
- pid_list.remove(i)
-
- pid_badness_list = []
-
- if _print_proc_table:
-
- if extra_table_info == 'None':
- extra_table_title = ''
-
- elif extra_table_info == 'cgroup_v1':
- extra_table_title = 'CGroup_v1'
-
- elif extra_table_info == 'cgroup_v2':
- extra_table_title = 'CGroup_v2'
-
- elif extra_table_info == 'cmdline':
- extra_table_title = 'cmdline'
-
- elif extra_table_info == 'environ':
- extra_table_title = 'environ'
-
- elif extra_table_info == 'realpath':
- extra_table_title = 'realpath'
-
- elif extra_table_info == 'All':
- extra_table_title = '[CGroup] [CmdLine] [RealPath]'
- else:
- extra_table_title = ''
-
- hr = '#' * 115
-
- log(hr)
- log('# PID PPID badness oom_score oom_score_adj e'
- 'UID S VmSize VmRSS VmSwap Name {}'.format(
- extra_table_title))
- log('#------- ------- ------- --------- ------------- -------'
- '--- - ------ ----- ------ --------------- --------')
-
- for pid in pid_list:
-
- badness = pid_to_badness(pid)[0]
-
- if badness is None:
- continue
-
- if _print_proc_table:
-
- try:
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
- except FileNotFoundError:
- continue
-
- if pid_to_status(pid) is None:
- continue
- else:
- (name, state, ppid, uid, vm_size, vm_rss,
- vm_swap) = pid_to_status(pid)
-
- if extra_table_info == 'None':
- extra_table_line = ''
-
- elif extra_table_info == 'cgroup_v1':
- extra_table_line = pid_to_cgroup_v1(pid)
-
- elif extra_table_info == 'cgroup_v2':
- extra_table_line = pid_to_cgroup_v2(pid)
-
- elif extra_table_info == 'cmdline':
- extra_table_line = pid_to_cmdline(pid)
-
- elif extra_table_info == 'environ':
- extra_table_line = pid_to_environ(pid)
-
- elif extra_table_info == 'realpath':
- extra_table_line = pid_to_realpath(pid)
-
- elif extra_table_info == 'All':
- extra_table_line = '[CG: {}] [CL: {}] [RP: {}]'.format(
- pid_to_cgroup_v1(pid),
- pid_to_cmdline(pid),
- pid_to_realpath(pid)
- )
- else:
- extra_table_line = ''
-
- log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format(
- pid.rjust(7),
- ppid.rjust(7),
- str(badness).rjust(7),
- oom_score.rjust(9),
- oom_score_adj.rjust(13),
- uid.rjust(10),
- state,
- str(vm_size).rjust(6),
- str(vm_rss).rjust(5),
- str(vm_swap).rjust(6),
- name.ljust(15),
- extra_table_line
- )
- )
-
- pid_badness_list.append((pid, badness))
-
- real_proc_num = len(pid_badness_list)
-
- # Make list of (pid, badness) tuples, sorted by 'badness' values
- # print(pid_badness_list)
- pid_tuple_list = sorted(
- pid_badness_list,
- key=itemgetter(1),
- reverse=True
- )[0]
-
- pid = pid_tuple_list[0]
-
- # Get maximum 'badness' value
- victim_badness = pid_tuple_list[1]
- victim_name = pid_to_name(pid)
-
- if _print_proc_table:
- log(hr)
-
- log('Found {} processes with existing /proc/[pid]/exe'.format(
- real_proc_num))
-
- log(
- 'Process with highest badness (found in {} ms):\n PID: {}, Na'
- 'me: {}, badness: {}'.format(
- round((time() - ft1) * 1000),
- pid,
- victim_name,
- victim_badness
- )
- )
-
- return pid, victim_badness, victim_name
-
-
-def find_victim_info(pid, victim_badness, name):
- """
- """
- status0 = time()
-
- try:
-
- with open('/proc/' + pid + '/status') as f:
-
- for n, line in enumerate(f):
-
- if n is state_index:
- state = line.split('\t')[1].rstrip()
- continue
-
- if n is ppid_index:
- ppid = line.split('\t')[1]
- continue
-
- if n is uid_index:
- uid = line.split('\t')[2]
- continue
-
- if n is vm_size_index:
- vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_rss_index:
- vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if detailed_rss:
-
- if n is anon_index:
- anon_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is file_index:
- file_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is shmem_index:
- shmem_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_swap_index:
- vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
- break
-
- cmdline = pid_to_cmdline(pid)
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
-
- except FileNotFoundError:
- log('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
- except ProcessLookupError:
- log('The victim died in the search process: ProcessLookupError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ProcessLookupError')
- return None
- except UnicodeDecodeError:
-
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
-
- for i in range(len(f_list)):
-
- if i is state_index:
- state = f_list[i].split('\t')[1].rstrip()
-
- if i is ppid_index:
- ppid = f_list[i].split('\t')[1]
-
- if i is uid_index:
- uid = f_list[i].split('\t')[2]
-
- if i is vm_size_index:
- vm_size = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_rss_index:
- vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- if detailed_rss:
-
- if i is anon_index:
- anon_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is file_index:
- file_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is shmem_index:
- shmem_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_swap_index:
- vm_swap = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- cmdline = pid_to_cmdline(pid)
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
-
- except IndexError:
- log('The victim died in the search process: IndexError')
- update_stat_dict_and_print(
- 'The victim died in the search process: IndexError')
- return None
- except ValueError:
- log('The victim died in the search process: ValueError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ValueError')
- return None
- except FileNotFoundError:
- log('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
- except ProcessLookupError:
- log('The victim died in the search process: ProcessLookupError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ProcessLookupError')
- return None
-
- len_vm = len(str(vm_size))
-
- try:
- realpath = os.path.realpath('/proc/' + pid + '/exe')
- victim_lifetime = format_time(uptime() - pid_to_starttime(pid))
- victim_cgroup_v1 = pid_to_cgroup_v1(pid)
- victim_cgroup_v2 = pid_to_cgroup_v2(pid)
-
- except FileNotFoundError:
- print('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
-
- ancestry = pid_to_ancestry(pid, max_ancestry_depth)
-
- if detailed_rss:
- detailed_rss_info = ' (' \
- 'Anon: {} MiB, ' \
- 'File: {} MiB, ' \
- 'Shmem: {} MiB)'.format(
- anon_rss,
- file_rss,
- shmem_rss)
- else:
- detailed_rss_info = ''
-
- victim_info = 'Victim information (found in {} ms):' \
- '\n Name: {}' \
- '\n State: {}' \
- '\n PID: {}' \
- '{}' \
- '\n EUID: {}' \
- '\n badness: {}, ' \
- 'oom_score: {}, ' \
- 'oom_score_adj: {}' \
- '\n VmSize: {} MiB' \
- '\n VmRSS: {} MiB {}' \
- '\n VmSwap: {} MiB' \
- '\n CGroup_v1: {}' \
- '\n CGroup_v2: {}' \
- '\n Realpath: {}' \
- '\n Cmdline: {}' \
- '\n Lifetime: {}'.format(
- round((time() - status0) * 1000),
- name,
- state,
- pid,
- ancestry,
- uid,
- victim_badness,
- oom_score,
- oom_score_adj,
- vm_size,
- str(vm_rss).rjust(len_vm),
- detailed_rss_info,
- str(vm_swap).rjust(len_vm),
- victim_cgroup_v1,
- victim_cgroup_v2,
- realpath,
- cmdline,
- victim_lifetime)
-
- return victim_info
-
-
-def implement_corrective_action(signal):
- """
- Find victim with highest badness and send SIGTERM/SIGKILL
- """
- time0 = time()
-
- # выходим из фции, если для SIGTERM порога не превышено время
- # min_delay_after_sigterm и спим в течение over_sleep
- if signal is SIGTERM:
-
- dt = time() - actions_time_dict['action_handled'][0]
-
- if dt < min_delay_after_sigterm:
- print('min_delay_after_sigterm IS NOT EXCEEDED ({} < {})'.format(
- round(dt, 3), min_delay_after_sigterm))
-
- if print_sleep_periods:
- log('Sleep {} sec [in implement_corrective_action()]'.format(
- over_sleep))
-
- sleep(over_sleep)
-
- return None # время задержки между действиями не истекло
- else:
- print('min_delay_after_sigterm IS EXCEEDED, it is time to action')
-
- """
-
- При заходе в фцию проверяем права на сигтерм. Права на сигкилл у нас
- всегда есть
- (потому что идем дальше только после полн освободж памяти после
- смерти жертвы)
-
- actions_time_dict[action_handled] = time()
- actions_time_dict[veto] = True
-
- actions_time_dict['action_handled'] = [time(), victim_id]
-
-
-
- """
-
- log(mem_info)
-
- pid, victim_badness, name = find_victim(print_proc_table)
-
- if victim_badness >= min_badness:
-
- if print_victim_info:
- victim_info = find_victim_info(pid, victim_badness, name)
- log(victim_info)
-
- # пороги могли превысиься за время поиска жертвы (поиск может занимать
- # сотни миллисекунд)
- mem_available, swap_total, swap_free = check_mem_and_swap()
-
- ma_mib = int(mem_available) / 1024.0
- sf_mib = int(swap_free) / 1024.0
- log('Memory status before implementing a corrective act'
- 'ion:\n MemAvailable'
- ': {} MiB, SwapFree: {} MiB'.format(
- round(ma_mib, 1), round(sf_mib, 1)
- )
- )
-
- if (mem_available <= mem_min_sigkill_kb and
- swap_free <= swap_min_sigkill_kb):
- log('Hard threshold exceeded')
- signal = SIGKILL
-
- victim_id = get_victim_id(pid)
-
- # kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ
- # ЗАДАННОГО ВРЕМЕНИ
-
- # переопределяем сигнал для старых жертв
- if signal is SIGTERM:
-
- if victim_id in victim_dict:
-
- dt = time() - victim_dict[victim_id]
-
- if dt > max_post_sigterm_victim_lifetime:
- print('max_post_sigterm_victim_lifetime exceeded: the '
- 'victim will get SIGKILL')
- signal = SIGKILL
-
- # matching with re to customize corrective actions
- soft_match = False
-
- if soft_actions and signal is SIGTERM:
- name = pid_to_name(pid)
- cgroup_v1 = pid_to_cgroup_v1(pid)
- service = ''
- cgroup_v1_tail = cgroup_v1.rpartition('/')[2]
- if cgroup_v1_tail.endswith('.service'):
- service = cgroup_v1_tail
- for i in soft_actions_list:
- unit = i[0]
- if unit == 'name':
- u = name
- else:
- u = cgroup_v1
- regexp = i[1]
- command = i[2]
- if search(regexp, u) is not None:
- log("Regexp '{}' matches with {} '{}'".format(
- regexp, unit, u))
- soft_match = True
- break
-
- if soft_match:
-
- # todo: make new func
- m = check_mem_and_swap()
- ma = int(m[0]) / 1024.0
- sf = int(m[2]) / 1024.0
- log('Memory status before implementing a corrective act'
- 'ion:\n MemAvailable'
- ': {} MiB, SwapFree: {} MiB'.format(
- round(ma, 1), round(sf, 1)
- )
- )
-
- cmd = command.replace(
- '$PID',
- pid).replace(
- '$NAME',
- pid_to_name(pid)).replace(
- '$SERVICE',
- service)
-
- exit_status = exe(cmd)
-
- exit_status = str(exit_status)
-
- response_time = time() - time0
-
- # тут надо, как и при дефолтном действии, проверять существование
- # жертвы, ее реакцию на действие,
- # и время ее смерти в случае успеха, о обновление таймстемпов
- # действия
-
- etc_info = 'Implement a corrective act' \
- 'ion:\n Run the command: {}' \
- '\n Exit status: {}; total response ' \
- 'time: {} ms'.format(
- cmd,
- exit_status,
- round(response_time * 1000))
-
- log(etc_info)
-
- key = "Run the command '{}'".format(cmd)
- update_stat_dict_and_print(key)
-
- if gui_notifications:
- send_notify_etc(
- pid,
- name,
- command.replace('$PID', pid).replace(
- '$NAME', pid_to_name(pid)))
-
- else:
-
- # обычное действие через сигнал
- try:
-
- os.kill(int(pid), signal)
- kill_timestamp = time()
- response_time = kill_timestamp - time0
-
- while True:
- exe_exists = os.path.exists('/proc/{}/exe'.format(pid))
- rss = pid_to_rss(pid)
- dt = time() - kill_timestamp
- log('Victim VmRSS: {} KiB'.format(rss))
- if not exe_exists or rss == 0 or dt > 0.01:
- # print(dt)
- break
- sleep(0.001)
-
- if dt > 0.01:
- log('Timer (value = 0.01 sec) expired; seems'
- ' like the victim handles signal')
-
- actions_time_dict['action_handled'] = [
- time(), get_victim_id(pid)]
-
- if victim_id not in victim_dict: # хз как надо.
- victim_dict.update({victim_id: time()})
-
- # log('actions_time_dict', actions_time_dict)
- # log('victim_dict', victim_dict)
-
- else:
- log('Process exited (VmRSS = 0) in {} sec'.format(
- round(dt, 5)))
-
- if signal is SIGKILL or not exe_exists or rss == 0:
-
- while True:
- sleep(0.001)
- # рсс не важен когда путь не существует. Проверяй
- # просто существование пид.
- rss = pid_to_rss(pid)
- if rss is None:
- break
- t1 = time()
- kill_duration = t1 - kill_timestamp
- log('The victim died in {} sec'.format(
- round(kill_duration, 3)))
-
- mem_available, swap_total, swap_free = check_mem_and_swap()
-
- ma_mib = int(mem_available) / 1024.0
- sf_mib = int(swap_free) / 1024.0
- log('Memory status after implementing a corrective act'
- 'ion:\n MemAvailable'
- ': {} MiB, SwapFree: {} MiB'.format(
- round(ma_mib, 1), round(sf_mib, 1)
- )
- )
-
- send_result = 'total response time: {} ms'.format(
- round(response_time * 1000))
-
- preventing_oom_message = 'Implement a corrective action:' \
- '\n Send {} to the victim; {}'.format(
- sig_dict[signal], send_result)
-
- key = 'Send {} to {}'.format(sig_dict[signal], name)
-
- if signal is SIGKILL and post_kill_exe != '':
-
- cmd = post_kill_exe.replace('$PID', pid).replace(
- '$NAME', pid_to_name(pid))
-
- log('Execute post_kill_exe')
-
- exe(cmd)
-
- if gui_notifications:
- send_notify(signal, name, pid)
-
- except FileNotFoundError:
- response_time = time() - time0
- send_result = 'no such process; response time: {} ms'.format(
- round(response_time * 1000))
- key = 'FileNotFoundError (the victim died in the se' \
- 'arch process): '
- except ProcessLookupError:
- response_time = time() - time0
- send_result = 'no such process; response time: {} ms'.format(
- round(response_time * 1000))
- key = 'ProcessLookupError (the victim died in the se' \
- 'arch process): '
-
- try:
- log(preventing_oom_message)
-
- except UnboundLocalError:
- preventing_oom_message = key
-
- update_stat_dict_and_print(key)
-
- else:
-
- response_time = time() - time0
- victim_badness_is_too_small = 'victim badness {} < min_b' \
- 'adness {}; nothing to do; response time: {} ms'.format(
- victim_badness,
- min_badness,
- round(response_time * 1000))
-
- log(victim_badness_is_too_small)
-
- # update stat_dict
- key = 'victim badness < min_badness'
- update_stat_dict_and_print(key)
-
- # тут надо поспать хорошенько. а может и счетчики поправить.
- # херню несу. во-первых, внезапно может кто-то появиться c блльшим
- # бэднес.. Далее надо минимизировать аутпут спам.
- sleep(over_sleep)
-
- # обновлять время не на каждый кил, а только на килл той жертвы,
- # которая не отвечала на софт экшн.
- # Вывод: ко времени действия прилагать также виктим айди.
-
- print('##################################################################')
-
-
-def sleep_after_check_mem():
- """Specify sleep times depends on rates and avialable memory."""
-
- if stable_sleep:
-
- if print_sleep_periods:
- log('Sleep {} sec'.format(min_sleep))
-
- sleep(min_sleep)
- return None
-
- if mem_min_sigkill_kb < mem_min_sigterm_kb:
- mem_point = mem_available - mem_min_sigterm_kb
- else:
- mem_point = mem_available - mem_min_sigkill_kb
-
- if swap_min_sigkill_kb < swap_min_sigterm_kb:
- swap_point = swap_free - swap_min_sigterm_kb
- else:
- swap_point = swap_free - swap_min_sigkill_kb
-
- if swap_point < 0:
- swap_point = 0
-
- if mem_point < 0:
- mem_point = 0
-
- t_mem = mem_point / rate_mem
- t_swap = swap_point / rate_swap
-
- if CHECK_ZRAM:
- t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram
- if t_zram < 0:
- t_zram = 0
- t_mem_zram = t_mem + t_zram
-
- t_mem_swap = t_mem + t_swap
-
- if CHECK_ZRAM:
-
- if t_mem_swap <= t_mem_zram:
- t = t_mem_swap
- else:
- t = t_mem_zram
- else:
- t = t_mem_swap
-
- if t > max_sleep:
- t = max_sleep
- elif t < min_sleep:
- t = min_sleep
- else:
- pass
-
- if print_sleep_periods:
-
- log(
- 'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format(
- round(t, 2),
- round(t_mem, 2),
- round(t_swap, 2),
- round(t_zram, 2)
- )
- )
-
- try:
- stdout.flush()
- except OSError:
- pass
-
- sleep(t)
-
-
-def calculate_percent(arg_key):
- """
- parse conf dict
- Calculate mem_min_KEY_percent.
-
- Try use this one)
- arg_key: str key for config_dict
- returns int mem_min_percent or NoneType if got some error
- """
-
- if arg_key in config_dict:
- mem_min = config_dict[arg_key]
-
- if mem_min.endswith('%'):
- # truncate percents, so we have a number
- mem_min_percent = mem_min[:-1].strip()
- # then 'float test'
- mem_min_percent = string_to_float_convert_test(mem_min_percent)
- if mem_min_percent is None:
- errprint('Invalid {} value, not float\nExit'.format(arg_key))
- exit(1)
- # Final validations...
- if mem_min_percent < 0 or mem_min_percent > 100:
- errprint(
- '{}, as percents value, out of ran'
- 'ge [0; 100]\nExit'.format(arg_key))
- exit(1)
-
- # mem_min_sigterm_percent is clean and valid float percentage. Can
- # translate into Kb
- mem_min_kb = mem_min_percent / 100 * mem_total
- mem_min_mb = round(mem_min_kb / 1024)
-
- elif mem_min.endswith('M'):
- mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip())
- if mem_min_mb is None:
- errprint('Invalid {} value, not float\nExit'.format(arg_key))
- exit(1)
- mem_min_kb = mem_min_mb * 1024
- if mem_min_kb > mem_total:
- errprint(
- '{} value can not be greater then MemT'
- 'otal ({} MiB)\nExit'.format(
- arg_key, round(
- mem_total / 1024)))
- exit(1)
- mem_min_percent = mem_min_kb / mem_total * 100
-
- else:
- log('Invalid {} units in config.\n Exit'.format(arg_key))
- exit(1)
- mem_min_percent = None
-
- else:
- log('{} not in config\nExit'.format(arg_key))
- exit(1)
- mem_min_percent = None
-
- return mem_min_kb, mem_min_mb, mem_min_percent
-
-
-##########################################################################
-
-
-start_time = time()
-
-
-help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG]
-
-optional arguments:
- -h, --help show this help message and exit
- -v, --version print version
- -t, --test print some tests
- -p, --print-proc-table
- print table of processes with their badness values
- -c CONFIG, --config CONFIG
- path to the config file, default values:
- ./nohang.conf, /etc/nohang/nohang.conf"""
-
-
-SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
-
-SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE'])
-
-conf_err_mess = 'Invalid config. Exit.'
-
-sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP]
-
-sig_dict = {
- SIGKILL: 'SIGKILL',
- SIGINT: 'SIGINT',
- SIGQUIT: 'SIGQUIT',
- SIGHUP: 'SIGHUP',
- SIGTERM: 'SIGTERM'
-}
-
-self_pid = str(os.getpid())
-
-self_uid = os.geteuid()
-
-if self_uid == 0:
- root = True
-else:
- root = False
-
-
-if os.path.exists('./nohang_notify_helper'):
- notify_helper_path = './nohang_notify_helper'
-else:
- notify_helper_path = '/usr/sbin/nohang_notify_helper'
-
-
-victim_dict = dict()
-
-
-victim_id = None
-actions_time_dict = dict()
-actions_time_dict['action_handled'] = [time(), victim_id]
-# print(actions_time_dict)
-
-
-# will store corrective actions stat
-stat_dict = dict()
-
-
-separate_log = False # will be overwritten after parse config
-
-
-cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
-
-
-self_oom_score_adj_min = '-600'
-self_oom_score_adj_max = '-6'
-
-
-write_self_oom_score_adj(self_oom_score_adj_min)
-
-
-pid_list = get_pid_list()
-
-
-print_proc_table_flag = False
-
-if len(argv) == 1:
- if os.path.exists('./nohang.conf'):
- config = os.getcwd() + '/nohang.conf'
- else:
- config = '/etc/nohang/nohang.conf'
-
-elif len(argv) == 2:
- if argv[1] == '--help' or argv[1] == '-h':
- print(help_mess)
- exit()
- elif argv[1] == '--version' or argv[1] == '-v':
- print_version()
- elif argv[1] == '--test' or argv[1] == '-t':
- test()
- elif argv[1] == '--print-proc-table' or argv[1] == '-p':
- print_proc_table_flag = True
- if os.path.exists('./nohang.conf'):
- config = os.getcwd() + '/nohang.conf'
- else:
- config = '/etc/nohang/nohang.conf'
- else:
- errprint('Unknown option: {}'.format(argv[1]))
- exit(1)
-
-elif len(argv) == 3:
- if argv[1] == '--config' or argv[1] == '-c':
- config = argv[2]
- else:
- errprint('Unknown option: {}'.format(argv[1]))
- exit(1)
-
-else:
- errprint('Invalid CLI input: too many options')
- exit(1)
-
-
-# find mem_total
-# find positions of SwapFree and SwapTotal in /proc/meminfo
-
-with open('/proc/meminfo') as f:
- mem_list = f.readlines()
-
-mem_list_names = []
-for s in mem_list:
- mem_list_names.append(s.split(':')[0])
-
-if mem_list_names[2] != 'MemAvailable':
- errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied')
- # exit(1)
-
-swap_total_index = mem_list_names.index('SwapTotal')
-swap_free_index = swap_total_index + 1
-
-mem_total = int(mem_list[0].split(':')[1][:-4])
-
-# Get names from /proc/*/status to be able to get VmRSS and VmSwap values
-
-with open('/proc/self/status') as file:
- status_list = file.readlines()
-
-status_names = []
-for s in status_list:
- status_names.append(s.split(':')[0])
-
-ppid_index = status_names.index('PPid')
-vm_size_index = status_names.index('VmSize')
-vm_rss_index = status_names.index('VmRSS')
-vm_swap_index = status_names.index('VmSwap')
-uid_index = status_names.index('Uid')
-state_index = status_names.index('State')
-
-
-try:
- anon_index = status_names.index('RssAnon')
- file_index = status_names.index('RssFile')
- shmem_index = status_names.index('RssShmem')
- detailed_rss = True
- # print(detailed_rss, 'detailed_rss')
-except ValueError:
- detailed_rss = False
- # print('It is not Linux 4.5+')
-
-
-log('Config: ' + config)
-
-
-##########################################################################
-
-# parsing the config with obtaining the parameters dictionary
-
-# conf_parameters_dict
-# conf_restart_dict
-
-# dictionary with config options
-config_dict = dict()
-
-processname_re_list = []
-cmdline_re_list = []
-environ_re_list = []
-uid_re_list = []
-cgroup_v1_re_list = []
-cgroup_v2_re_list = []
-realpath_re_list = []
-
-soft_actions_list = []
-
-
-# separator for optional parameters (that starts with @)
-opt_separator = '///'
-
-
-# stupid conf parsing, need refactoring
-try:
- with open(config) as f:
-
- for line in f:
-
- a = line.startswith('#')
- b = line.startswith('\n')
- c = line.startswith('\t')
- d = line.startswith(' ')
-
- etc = line.startswith('@SOFT_ACTION_RE_NAME')
- etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1')
-
- if not a and not b and not c and not d and not etc and not etc2:
- a = line.partition('=')
-
- key = a[0].strip()
- value = a[2].strip()
-
- if key not in config_dict:
- config_dict[key] = value
- else:
- log('ERROR: config key duplication: {}'.format(key))
- exit(1)
-
- if etc:
-
- a = line.partition('@SOFT_ACTION_RE_NAME')[
- 2].partition(opt_separator)
-
- a1 = 'name'
-
- a2 = a[0].strip()
- valid_re(a2)
-
- a3 = a[2].strip()
-
- zzz = (a1, a2, a3)
-
- soft_actions_list.append(zzz)
-
- if etc2:
-
- a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[
- 2].partition(opt_separator)
-
- a1 = 'cgroup_v1'
-
- a2 = a[0].strip()
- valid_re(a2)
-
- a3 = a[2].strip()
-
- zzz = (a1, a2, a3)
-
- soft_actions_list.append(zzz)
-
- if line.startswith('@PROCESSNAME_RE'):
- a = line.partition(
- '@PROCESSNAME_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- processname_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@CMDLINE_RE'):
- a = line.partition(
- '@CMDLINE_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- cmdline_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@UID_RE'):
- a = line.partition(
- '@UID_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- uid_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@CGROUP_V1_RE'):
- a = line.partition(
- '@CGROUP_V1_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- cgroup_v1_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@CGROUP_V2_RE'):
- a = line.partition(
- '@CGROUP_V2_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- cgroup_v2_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@REALPATH_RE'):
- a = line.partition(
- '@REALPATH_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- realpath_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@ENVIRON_RE'):
- a = line.partition(
- '@ENVIRON_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- environ_re_list.append((badness_adj, reg_exp))
-
-
-except PermissionError:
- errprint('PermissionError', conf_err_mess)
- exit(1)
-except UnicodeDecodeError:
- errprint('UnicodeDecodeError', conf_err_mess)
- exit(1)
-except IsADirectoryError:
- errprint('IsADirectoryError', conf_err_mess)
- exit(1)
-except IndexError:
- errprint('IndexError', conf_err_mess)
- exit(1)
-except FileNotFoundError:
- errprint('FileNotFoundError', conf_err_mess)
- exit(1)
-
-
-if processname_re_list == []:
- regex_matching = False
-else:
- regex_matching = True
-
-
-if cmdline_re_list == []:
- re_match_cmdline = False
-else:
- re_match_cmdline = True
-
-
-if uid_re_list == []:
- re_match_uid = False
-else:
- re_match_uid = True
-
-
-if environ_re_list == []:
- re_match_environ = False
-else:
- re_match_environ = True
-
-
-if realpath_re_list == []:
- re_match_realpath = False
-else:
- re_match_realpath = True
-
-
-if cgroup_v1_re_list == []:
- re_match_cgroup_v1 = False
-else:
- re_match_cgroup_v1 = True
-
-if cgroup_v2_re_list == []:
- re_match_cgroup_v2 = False
-else:
- re_match_cgroup_v2 = True
-
-
-# print(processname_re_list)
-# print(cmdline_re_list)
-# print(uid_re_list)
-# print(environ_re_list)
-# print(realpath_re_list)
-# print(cgroup_v1_re_list)
-# print(cgroup_v2_re_list)
-
-# print(soft_actions_list)
-
-if soft_actions_list == []:
- soft_actions = False
-else:
- soft_actions = True
-
-# print('soft_actions:', soft_actions)
-
-##########################################################################
-
-
-# extracting parameters from the dictionary
-# check for all necessary parameters
-# validation of all parameters
-psi_debug = conf_parse_bool('psi_debug')
-print_total_stat = conf_parse_bool('print_total_stat')
-print_proc_table = conf_parse_bool('print_proc_table')
-forbid_negative_badness = conf_parse_bool('forbid_negative_badness')
-print_victim_info = conf_parse_bool('print_victim_info')
-print_config = conf_parse_bool('print_config')
-print_mem_check_results = conf_parse_bool('print_mem_check_results')
-print_sleep_periods = conf_parse_bool('print_sleep_periods')
-gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings')
-gui_notifications = conf_parse_bool('gui_notifications')
-decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj')
-ignore_psi = conf_parse_bool('ignore_psi')
-ignore_zram = conf_parse_bool('ignore_zram')
-
-
-(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent
- ) = calculate_percent('mem_min_sigterm')
-
-(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent
- ) = calculate_percent('mem_min_sigkill')
-
-(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent
- ) = calculate_percent('zram_max_sigterm')
-
-(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent
- ) = calculate_percent('zram_max_sigkill')
-
-(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent
- ) = calculate_percent('mem_min_warnings')
-
-(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent
- ) = calculate_percent('zram_max_warnings')
-
-
-if 'rate_mem' in config_dict:
- rate_mem = string_to_float_convert_test(config_dict['rate_mem'])
- if rate_mem is None:
- errprint('Invalid rate_mem value, not float\nExit')
- exit(1)
- if rate_mem <= 0:
- errprint('rate_mem MUST be > 0\nExit')
- exit(1)
-else:
- errprint('rate_mem not in config\nExit')
- exit(1)
-
-
-if 'rate_swap' in config_dict:
- rate_swap = string_to_float_convert_test(config_dict['rate_swap'])
- if rate_swap is None:
- errprint('Invalid rate_swap value, not float\nExit')
- exit(1)
- if rate_swap <= 0:
- errprint('rate_swap MUST be > 0\nExit')
- exit(1)
-else:
- errprint('rate_swap not in config\nExit')
- exit(1)
-
-
-if 'rate_zram' in config_dict:
- rate_zram = string_to_float_convert_test(config_dict['rate_zram'])
- if rate_zram is None:
- errprint('Invalid rate_zram value, not float\nExit')
- exit(1)
- if rate_zram <= 0:
- errprint('rate_zram MUST be > 0\nExit')
- exit(1)
-else:
- errprint('rate_zram not in config\nExit')
- exit(1)
-
-
-if 'swap_min_sigterm' in config_dict:
- swap_min_sigterm = config_dict['swap_min_sigterm']
-else:
- errprint('swap_min_sigterm not in config\nExit')
- exit(1)
-
-
-if 'swap_min_sigkill' in config_dict:
- swap_min_sigkill = config_dict['swap_min_sigkill']
-else:
- errprint('swap_min_sigkill not in config\nExit')
- exit(1)
-
-
-if 'min_delay_after_sigterm' in config_dict:
- min_delay_after_sigterm = string_to_float_convert_test(
- config_dict['min_delay_after_sigterm'])
- if min_delay_after_sigterm is None:
- errprint('Invalid min_delay_after_sigterm value, not float\nExit')
- exit(1)
- if min_delay_after_sigterm < 0:
- errprint('min_delay_after_sigterm must be positiv\nExit')
- exit(1)
-else:
- errprint('min_delay_after_sigterm not in config\nExit')
- exit(1)
-
-
-if 'psi_post_action_delay' in config_dict:
- psi_post_action_delay = string_to_float_convert_test(
- config_dict['psi_post_action_delay'])
- if psi_post_action_delay is None:
- errprint('Invalid psi_post_action_delay value, not float\nExit')
- exit(1)
- if psi_post_action_delay < 0:
- errprint('psi_post_action_delay must be positive\nExit')
- exit(1)
-else:
- errprint('psi_post_action_delay not in config\nExit')
- exit(1)
-
-
-if 'sigkill_psi_threshold' in config_dict:
- sigkill_psi_threshold = string_to_float_convert_test(
- config_dict['sigkill_psi_threshold'])
- if sigkill_psi_threshold is None:
- errprint('Invalid sigkill_psi_threshold value, not float\nExit')
- exit(1)
- if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100:
- errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit')
- exit(1)
-else:
- errprint('sigkill_psi_threshold not in config\nExit')
- exit(1)
-
-
-if 'sigterm_psi_threshold' in config_dict:
- sigterm_psi_threshold = string_to_float_convert_test(
- config_dict['sigterm_psi_threshold'])
- if sigterm_psi_threshold is None:
- errprint('Invalid sigterm_psi_threshold value, not float\nExit')
- exit(1)
- if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100:
- errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit')
- exit(1)
-else:
- errprint('sigterm_psi_threshold not in config\nExit')
- exit(1)
-
-
-if 'min_badness' in config_dict:
- min_badness = string_to_int_convert_test(
- config_dict['min_badness'])
- if min_badness is None:
- errprint('Invalid min_badness value, not integer\nExit')
- exit(1)
- if min_badness < 0 or min_badness > 1000:
- errprint('Invalud min_badness value\nExit')
- exit(1)
-else:
- errprint('min_badness not in config\nExit')
- exit(1)
-
-
-if 'oom_score_adj_max' in config_dict:
- oom_score_adj_max = string_to_int_convert_test(
- config_dict['oom_score_adj_max'])
- if oom_score_adj_max is None:
- errprint('Invalid oom_score_adj_max value, not integer\nExit')
- exit(1)
- if oom_score_adj_max < 0 or oom_score_adj_max > 1000:
- errprint('Invalid oom_score_adj_max value\nExit')
- exit(1)
-else:
- errprint('oom_score_adj_max not in config\nExit')
- exit(1)
-
-
-if 'min_time_between_warnings' in config_dict:
- min_time_between_warnings = string_to_float_convert_test(
- config_dict['min_time_between_warnings'])
- if min_time_between_warnings is None:
- errprint('Invalid min_time_between_warnings value, not float\nExit')
- exit(1)
- if min_time_between_warnings < 1 or min_time_between_warnings > 300:
- errprint('min_time_between_warnings value out of range [1; 300]\nExit')
- exit(1)
-else:
- errprint('min_time_between_warnings not in config\nExit')
- exit(1)
-
-
-if 'swap_min_warnings' in config_dict:
- swap_min_warnings = config_dict['swap_min_warnings']
-else:
- errprint('swap_min_warnings not in config\nExit')
- exit(1)
-
-
-if 'max_ancestry_depth' in config_dict:
- max_ancestry_depth = string_to_int_convert_test(
- config_dict['max_ancestry_depth'])
- if min_badness is None:
- errprint('Invalid max_ancestry_depth value, not integer\nExit')
- exit(1)
- if max_ancestry_depth < 1:
- errprint('Invalud max_ancestry_depth value\nExit')
- exit(1)
-else:
- errprint('max_ancestry_depth is not in config\nExit')
- exit(1)
-
-
-if 'max_post_sigterm_victim_lifetime' in config_dict:
- max_post_sigterm_victim_lifetime = string_to_float_convert_test(
- config_dict['max_post_sigterm_victim_lifetime'])
- if max_post_sigterm_victim_lifetime is None:
- errprint('Invalid max_post_sigterm_victim_lifetime val'
- 'ue, not float\nExit')
- exit(1)
- if max_post_sigterm_victim_lifetime < 0:
- errprint('max_post_sigterm_victim_lifetime must be non-n'
- 'egative number\nExit')
- exit(1)
-else:
- errprint('max_post_sigterm_victim_lifetime is not in config\nExit')
- exit(1)
-
-
-if 'post_kill_exe' in config_dict:
- post_kill_exe = config_dict['post_kill_exe']
-else:
- errprint('post_kill_exe is not in config\nExit')
- exit(1)
-
-
-if 'psi_path' in config_dict:
- psi_path = config_dict['psi_path']
-else:
- errprint('psi_path is not in config\nExit')
- exit(1)
-
-
-
-
-
-
-
-if 'psi_target' in config_dict:
- psi_target = config_dict['psi_target']
-else:
- errprint('psi_target is not in config\nExit')
- exit(1)
-
-
-
-
-
-
-
-
-
-
-
-
-
-if 'psi_metrics' in config_dict:
- psi_metrics = config_dict['psi_metrics']
-else:
- errprint('psi_metrics is not in config\nExit')
- exit(1)
-
-
-if 'warning_exe' in config_dict:
- warning_exe = config_dict['warning_exe']
- if warning_exe != '':
- check_warning_exe = True
- else:
- check_warning_exe = False
-else:
- errprint('warning_exe is not in config\nExit')
- exit(1)
-
-
-if 'extra_table_info' in config_dict:
- extra_table_info = config_dict['extra_table_info']
- if (extra_table_info != 'None' and
- extra_table_info != 'cgroup_v1' and
- extra_table_info != 'cgroup_v2' and
- extra_table_info != 'cmdline' and
- extra_table_info != 'environ' and
- extra_table_info != 'realpath' and
- extra_table_info != 'All'):
-
- errprint('Invalid config: invalid extra_table_info value\nExit')
- exit(1)
-else:
- errprint('Invalid config: extra_table_info is not in config\nExit')
- exit(1)
-
-
-separate_log = conf_parse_bool('separate_log')
-
-if separate_log:
-
- import logging
- from logging import basicConfig
- from logging import info
-
- log_dir = '/var/log/nohang'
-
- try:
- os.mkdir(log_dir)
- except PermissionError:
- print('ERROR: can not create log dir')
- except FileExistsError:
- pass
-
- logfile = log_dir + '/nohang.log'
-
- try:
- with open(logfile, 'a') as f:
- pass
- except FileNotFoundError:
- print('ERROR: log FileNotFoundError')
- except PermissionError:
- print('ERROR: log PermissionError')
-
- try:
- basicConfig(
- filename=logfile,
- level=logging.INFO,
- format="%(asctime)s: %(message)s")
- except PermissionError:
- errprint('ERROR: Permission denied: {}'.format(logfile))
- except FileNotFoundError:
- errprint('ERROR: FileNotFoundError: {}'.format(logfile))
-
-
-if 'min_mem_report_interval' in config_dict:
- min_mem_report_interval = string_to_float_convert_test(
- config_dict['min_mem_report_interval'])
- if min_mem_report_interval is None:
- errprint('Invalid min_mem_report_interval value, not float\nExit')
- exit(1)
- if min_mem_report_interval < 0:
- errprint('min_mem_report_interval must be non-negative number\nExit')
- exit(1)
-else:
- errprint('min_mem_report_interval is not in config\nExit')
- exit(1)
-
-
-if 'max_sleep' in config_dict:
- max_sleep = string_to_float_convert_test(
- config_dict['max_sleep'])
- if max_sleep is None:
- errprint('Invalid max_sleep value, not float\nExit')
- exit(1)
- if max_sleep <= 0:
- errprint('max_sleep must be positive number\nExit')
- exit(1)
-else:
- errprint('max_sleep is not in config\nExit')
- exit(1)
-
-
-if 'min_sleep' in config_dict:
- min_sleep = string_to_float_convert_test(
- config_dict['min_sleep'])
- if min_sleep is None:
- errprint('Invalid min_sleep value, not float\nExit')
- exit(1)
- if min_sleep <= 0:
- errprint('min_sleep must be positive number\nExit')
- exit(1)
-else:
- errprint('min_sleep is not in config\nExit')
- exit(1)
-
-
-if 'over_sleep' in config_dict:
- over_sleep = string_to_float_convert_test(
- config_dict['over_sleep'])
- if over_sleep is None:
- errprint('Invalid over_sleep value, not float\nExit')
- exit(1)
- if over_sleep <= 0:
- errprint('over_sleep must be positive number\nExit')
- exit(1)
-else:
- errprint('over_sleep is not in config\nExit')
- exit(1)
-
-
-if max_sleep < min_sleep:
- errprint(
- 'max_sleep value must not exceed min_sleep value.\nExit'
- )
- exit(1)
-
-
-if min_sleep < over_sleep:
- errprint(
- 'min_sleep value must not exceed over_sleep value.\nExit'
- )
- exit(1)
-
-
-if max_sleep == min_sleep:
- stable_sleep = True
-else:
- stable_sleep = False
-
-
-if print_proc_table_flag:
-
- if not root:
- log('WARNING: effective UID != 0; euid={}; processes with other e'
- 'uids will be invisible for nohang'.format(self_uid))
-
- func_print_proc_table()
-
-
-##########################################################################
-
-
-psi_support = os.path.exists(psi_path)
-
-
-##########################################################################
-
-
-# Get KiB levels if it's possible.
-
-
-swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm)
-swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill)
-swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings)
-
-
-swap_term_is_percent = swap_min_sigterm_tuple[1]
-if swap_term_is_percent:
- swap_min_sigterm_percent = swap_min_sigterm_tuple[0]
-else:
- swap_min_sigterm_kb = swap_min_sigterm_tuple[0]
-
-
-swap_kill_is_percent = swap_min_sigkill_tuple[1]
-if swap_kill_is_percent:
- swap_min_sigkill_percent = swap_min_sigkill_tuple[0]
-else:
- swap_min_sigkill_kb = swap_min_sigkill_tuple[0]
-
-
-swap_warn_is_percent = swap_min_warnings_tuple[1]
-if swap_warn_is_percent:
- swap_min_warnings_percent = swap_min_warnings_tuple[0]
-else:
- swap_min_warnings_kb = swap_min_warnings_tuple[0]
-
-
-##########################################################################
-
-# outdated section, need fixes
-
-if print_config:
-
- print(
- '\n1. Memory levels to respond to as an OOM threat\n[display'
- 'ing these options need fix]\n')
-
- print('mem_min_sigterm: {} MiB, {} %'.format(
- round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1)))
- print('mem_min_sigkill: {} MiB, {} %'.format(
- round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1)))
-
- print('swap_min_sigterm: {}'.format(swap_min_sigterm))
- print('swap_min_sigkill: {}'.format(swap_min_sigkill))
-
- print('zram_max_sigterm: {} MiB, {} %'.format(
- round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1)))
- print('zram_max_sigkill: {} MiB, {} %'.format(
- round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1)))
-
- print('\n2. The frequency of checking the level of available m'
- 'emory (and CPU usage)\n')
- print('rate_mem: {}'.format(rate_mem))
- print('rate_swap: {}'.format(rate_swap))
- print('rate_zram: {}'.format(rate_zram))
-
- print('\n3. The prevention of killing innocent victims\n')
- print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm))
- print('min_badness: {}'.format(min_badness))
-
- print('decrease_oom_score_adj: {}'.format(
- decrease_oom_score_adj
- ))
- if decrease_oom_score_adj:
- print('oom_score_adj_max: {}'.format(oom_score_adj_max))
-
- print('\n4. Impact on the badness of processes via matching their'
- ' names, cmdlines ir UIDs with regular expressions\n')
-
- print('(todo)')
-
- print('\n5. The execution of a specific command instead of sen'
- 'ding the\nSIGTERM signal\n')
-
- print('\n6. GUI notifications:\n- OOM prevention results and\n- low m'
- 'emory warnings\n')
- print('gui_notifications: {}'.format(gui_notifications))
-
- print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings))
- if gui_low_memory_warnings:
- print('min_time_between_warnings: {}'.format(
- min_time_between_warnings))
-
- print('mem_min_warnings: {} MiB, {} %'.format(
- round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1)))
-
- print('swap_min_warnings: {}'.format(swap_min_warnings))
-
- print('zram_max_warnings: {} MiB, {} %'.format(
- round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1)))
-
- print('\n7. Output verbosity\n')
- print('print_config: {}'.format(print_config))
- print('print_mem_check_results: {}'.format(print_mem_check_results))
- print('print_sleep_periods: {}\n'.format(print_sleep_periods))
-
-
-##########################################################################
-
-
-# for calculating the column width when printing mem and zram
-mem_len = len(str(round(mem_total / 1024.0)))
-
-if gui_notifications:
- notify_sig_dict = {SIGKILL: 'Killing',
- SIGTERM: 'Terminating'}
-
-
-# convert rates from MiB/s to KiB/s
-rate_mem = rate_mem * 1024
-rate_swap = rate_swap * 1024
-rate_zram = rate_zram * 1024
-
-
-warn_time_now = 0
-warn_time_delta = 1000
-warn_timer = 0
-
-
-##########################################################################
-
-
-if not root:
- log('WARNING: effective UID != 0; euid={}; processes with other e'
- 'uids will be invisible for nohang'.format(self_uid))
-
-
-# Try to lock all memory
-
-mlockall()
-
-##########################################################################
-
-
-# print_self_rss()
-
-
-psi_avg_string = '' # will be overwritten if PSI monitoring enabled
-
-mem_used_zram = 0
-
-if psi_support and not ignore_psi:
- psi_t0 = time()
-
-
-if print_mem_check_results:
-
- # to find delta mem
- wt2 = 0
- new_mem = 0
-
- # init mem report interval
- report0 = 0
-
-
-# handle signals
-for i in sig_list:
- signal(i, signal_handler)
-
-
-CHECK_PSI = False
-if psi_support and not ignore_psi:
- CHECK_PSI = True
-
-
-CHECK_ZRAM = not ignore_zram
-
-log('Monitoring has started!')
-
-stdout.flush()
-
-
-
-
-
-
-i = cgroup2_root()
-
-print(i)
-print(psi_target)
-
-i = /foo
-
-
-##########################################################################
-
-
-while True:
-
- # Q = time()
-
- # FIND VALUES: mem, swap, zram, psi
-
- mem_available, swap_total, swap_free = check_mem_and_swap()
-
- # if swap_min_sigkill is set in percent
- if swap_kill_is_percent:
- swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0
-
- if swap_term_is_percent:
- swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0
-
- if swap_warn_is_percent:
- swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0
-
- if swap_total > swap_min_sigkill_kb:
- swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1))
- else:
- swap_sigkill_pc = '-'
-
- if swap_total > swap_min_sigterm_kb:
- swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1))
- else:
- swap_sigterm_pc = '-'
-
- if CHECK_ZRAM:
- mem_used_zram = check_zram()
-
- if CHECK_PSI:
- psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
- if time() - psi_t0 >= psi_post_action_delay:
- psi_post_action_delay_exceeded = True
- else:
- psi_post_action_delay_exceeded = False
-
- if print_mem_check_results:
- psi_avg_string = 'PSI avg value: {} | '.format(
- str(psi_avg_value).rjust(6))
-
- if print_mem_check_results:
-
- wt1 = time()
-
- delta = (mem_available + swap_free) - new_mem
-
- t_cycle = wt1 - wt2
-
- report_delta = wt1 - report0
-
- if report_delta >= min_mem_report_interval:
-
- mem_report = True
- new_mem = mem_available + swap_free
-
- report0 = wt1
-
- else:
- mem_report = False
-
- wt2 = time()
-
- if mem_report:
-
- speed = delta / 1024.0 / report_delta
- speed_info = ' | dMem: {} M/s'.format(
- str(round(speed)).rjust(5)
- )
-
- # Calculate 'swap-column' width
- swap_len = len(str(round(swap_total / 1024.0)))
-
- # Output available mem sizes
- if swap_total == 0 and mem_used_zram == 0:
- log('{}MemAvail: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- speed_info
- )
- )
-
- elif swap_total > 0 and mem_used_zram == 0:
- log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- human(swap_free, swap_len),
- just_percent_swap(swap_free / (swap_total + 0.1)),
- speed_info
- )
- )
-
- else:
- log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
- 'UsedZram: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- human(swap_free, swap_len),
- just_percent_swap(swap_free / (swap_total + 0.1)),
- human(mem_used_zram, mem_len),
- just_percent_mem(mem_used_zram / mem_total),
- speed_info
- )
- )
-
- ###########################################################################
-
- # CHECK HARD THRESHOLDS (SIGKILL LEVEL)
-
- if (mem_available <= mem_min_sigkill_kb and
- swap_free <= swap_min_sigkill_kb):
-
- mem_info = 'Hard threshold exceeded\nMemory status that requ' \
- 'ires corrective actions:' \
- '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
- 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
- 'p_min_sigkill [{} MiB, {} %]'.format(
- kib_to_mib(mem_available),
- percent(mem_available / mem_total),
- kib_to_mib(mem_min_sigkill_kb),
- percent(mem_min_sigkill_kb / mem_total),
- kib_to_mib(swap_free),
- percent(swap_free / (swap_total + 0.1)),
- kib_to_mib(swap_min_sigkill_kb),
- swap_sigkill_pc)
-
- implement_corrective_action(SIGKILL)
- psi_t0 = time()
- continue
-
- if CHECK_ZRAM:
- if mem_used_zram >= zram_max_sigkill_kb:
-
- mem_info = 'Hard threshold exceeded\nMemory status that requir' \
- 'es corrective actions:' \
- '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
- 'kill [{} MiB, {} %]'.format(
- kib_to_mib(mem_used_zram),
- percent(mem_used_zram / mem_total),
- kib_to_mib(zram_max_sigkill_kb),
- percent(zram_max_sigkill_kb / mem_total))
-
- implement_corrective_action(SIGKILL)
- psi_t0 = time()
- continue
-
- if CHECK_PSI:
- if psi_avg_value >= sigkill_psi_threshold:
- sigkill_psi_exceeded = True
- else:
- sigkill_psi_exceeded = False
-
- if sigkill_psi_exceeded and psi_post_action_delay_exceeded:
-
- mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \
- 'old ({})'.format(
- psi_avg_value, sigkill_psi_threshold)
-
- implement_corrective_action(SIGKILL)
- psi_t0 = time()
- continue
-
- ###########################################################################
-
- # CHECK SOFT THRESHOLDS (SIGTERM LEVEL)
-
- if (mem_available <= mem_min_sigterm_kb and
- swap_free <= swap_min_sigterm_kb):
-
- mem_info = 'Soft threshold exceeded\nMemory status that requi' \
- 'res corrective actions:' \
- '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
- 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
- 'p_min_sigterm [{} MiB, {} %]'.format(
- kib_to_mib(mem_available),
- percent(mem_available / mem_total),
- kib_to_mib(mem_min_sigterm_kb),
- round(mem_min_sigterm_percent, 1),
- kib_to_mib(swap_free),
- percent(swap_free / (swap_total + 0.1)),
- kib_to_mib(swap_min_sigterm_kb),
- swap_sigterm_pc)
-
- implement_corrective_action(SIGTERM)
- psi_t0 = time()
- continue
-
- if CHECK_ZRAM:
- if mem_used_zram >= zram_max_sigterm_kb:
-
- mem_info = 'Soft threshold exceeded\nMemory status that require' \
- 's corrective actions:\n MemUsedZram [{} MiB, {} %] >= zra' \
- 'm_max_sigterm [{} M, {} %]'.format(
- kib_to_mib(mem_used_zram),
- percent(mem_used_zram / mem_total),
- kib_to_mib(zram_max_sigterm_kb),
- percent(zram_max_sigterm_kb / mem_total))
-
- implement_corrective_action(SIGTERM)
- psi_t0 = time()
- continue
-
- if CHECK_PSI:
- if psi_avg_value >= sigterm_psi_threshold:
- sigterm_psi_exceeded = True
- else:
- sigterm_psi_exceeded = False
-
- if psi_debug:
- log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps'
- 'i_post_action_delay_exceeded: {}'.format(
- sigterm_psi_exceeded,
- sigkill_psi_exceeded,
- psi_post_action_delay_exceeded))
-
- if sigterm_psi_exceeded and psi_post_action_delay_exceeded:
-
- mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \
- 'shold ({})'.format(psi_avg_value, sigterm_psi_threshold)
-
- implement_corrective_action(SIGTERM)
- psi_t0 = time()
- continue
-
- ###########################################################################
-
- if gui_low_memory_warnings:
-
- if (mem_available <= mem_min_warnings_kb and
- swap_free <= swap_min_warnings_kb + 0.1 or
- mem_used_zram >= zram_max_warnings_kb):
-
- warn_time_delta = time() - warn_time_now
- warn_time_now = time()
- warn_timer += warn_time_delta
- if warn_timer > min_time_between_warnings:
- send_notify_warn()
- warn_timer = 0
-
-
- # x = time() - Q
- # print(x * 1000)
-
- sleep_after_check_mem()
diff --git a/trash/new5.conf b/trash/new5.conf
deleted file mode 100644
index 46681bd..0000000
--- a/trash/new5.conf
+++ /dev/null
@@ -1,140 +0,0 @@
-This is nohang config file.
-Lines starting with $ contain obligatory parameters.
-Lines starting with @ contain optional parameters.
-Other lines are comments.
-
- 0. Common zram settings
-
- See https://www.kernel.org/doc/Documentation/blockdev/zram.txt
- You maybe need $IGNORE_ZRAM=FALSE if you has a big zram disksize
-
-$IGNORE_ZRAM = TRUE
-
- 1. Common PSI settings
-
- See
- https://lwn.net/Articles/759658/
- https://facebookmicrosites.github.io/psi/
-
-$IGNORE_PSI = TRUE
-
-$PSI_PATH = /proc/pressure/memory
-
- some_avg10 is most sensitive.
-
-$PSI_METRICS = some_avg10
-
-$PSI_EXCESS_DURATION = 0
-
-$PSI_POST_ACTION_DELAY = 40
-
-
-
- 2. Poll rate
-
-$FILL_RATE_MEM = 4000
-$FILL_RATE_SWAP = 1500
-$FILL_RATE_ZRAM = 500
-$MIN_SLEEP = 0.1
-$MAX_SLEEP = 3
-
-
- 3. Warnings / GUI notifications
-
-$GUI_CORRECTIVE_ACTIONS = FALSE
-
-$GUI_LOW_MEMORY_WARNINGS = FALSE
-$GUI_WARNINGS_MIN_MEM = 20 %
-$GUI_WARNINGS_MIN_SWAP = 20 %
-$GUI_WARNINGS_MAX_ZRAM = 45 %
-$GUI_MIN_DELAY_AFTER_WARNING = 15
-
-@EXE_INSTEAD_OF_GUI_WARNING wall -n "LOW MEMORY!"
-@EXE_INSTEAD_OF_GUI_WARNING echo 'test'
-
-
- 4. Soft threshold / SIGTERM-related parameters
-
-$SOFT_MIN_MEM_THRESHOLD = 10 %
-$SOFT_MIN_SWAP_THRESHOLD = 10 %
-$SOFT_MAX_ZRAM_THRESHOLD = 50 %
-
-$SOFT_POST_ACTION_DELAY = 0.2
-
-$MAX_POST_SOFT_ACTION_VICTIM_LIFETIME = 9
-
-$SOFT_MAX_PSI_THRESHOLD = 60
-$SOFT_MAX_PSI_DURATION = 5
-
- The execution of specified command instead of sending the SIGTERM signal.
- Syntax example:
- ///
-
-@EXE_INSTEAD_OF_SIGTERM_RE_NAME ^foo$ /// kill -9 $PID && echo "Praise KEK, kill $NAME" &
-
-@EXE_INSTEAD_OF_SIGTERM_RE_CMDLINE ^/sbin/foo /// systemctl restart foo
-
-@EXE_INSTEAD_OF_SIGTERM_RE_REALPATH ^/sbin/bar$ /// systemctl restart foo
-
-@EXE_INSTEAD_OF_SIGTERM_RE_UID ^1000$ /// pkill -SEGV $NAME
-@EXE_INSTEAD_OF_SIGTERM_RE_UID ^1001$ /// pkill -HUP $NAME
-
-@SOFT_THRESHOLD_EXE_RE_NAME
-
- 5. Hard threshold / SIGKILL-related parameters
-
-$HARD_MIN_MEM_THRESHOLD = 5 %
-$HARD_MIN_SWAP_THRESHOLD = 5 %
-$HARD_MAX_ZRAM_THRESHOLD = 55 %
-
-$POST_KILL_EXE =
-
-$HARD_POST_ACTION_DELAY = 1
-
-$HARD_MAX_PSI_THRESHOLD = 90
-$HARD_MAX_PSI_DURATION = 5
-
-
-$POST_KILL_EXE =
-
-
-
- 6. Adjusting badness of processes
-
-$OOM_SCORE_ADJ_LIMIT = -1
-
- Badness adjusting by matching process name, cmdline and eUID with specified regular expression.
-
- Example badness adj rules
- ///
-
-@BADNESS_ADJ_RE_CMDLINE -childID|--type=renderer /// 200
-
-@BADNESS_ADJ_RE_NAME ^Xorg$ /// -100
-
-@BADNESS_ADJ_RE_UID ^0$ /// -50
-
-@BADNESS_ADJ_RE_REALPATH ^/usr/bin/tail$ /// 100
-
-
-
- 7. Avoid killing small processes (innocent victims)
-
-$MIN_VICTIM_BADNESS = 20
-
-
- 8. Verbosity
-
- $PRINT_CONFIG_AT_STARTUP = FALSE // --print-config
-
-$MIN_MEM_REPORT_INTERVAL = -1
-
-$PRINT_VICTIM_INFO = TRUE
-
-$PRINT_TOTAL_STAT = TRUE
-
-$PRINT_PROC_TABLE = FALSE
-
-$PRINT_SLEEP_PERIODS = FALSE
-
-
diff --git a/trash/nohang 0.2 rc1 b/trash/nohang 0.2 rc1
deleted file mode 100755
index 66de8d0..0000000
--- a/trash/nohang 0.2 rc1
+++ /dev/null
@@ -1,2946 +0,0 @@
-#!/usr/bin/env python3
-"""A daemon that prevents OOM in Linux systems."""
-
-import os
-from ctypes import CDLL
-from time import sleep, time
-from operator import itemgetter
-from sys import stdout, stderr, argv, exit, version
-from signal import (signal,
- SIGKILL, SIGTERM, SIGINT, SIGQUIT,
- SIGHUP, SIGABRT, SIGSEGV, SIGBUS)
-from re import search
-from sre_constants import error as invalid_re
-
-start_time = time()
-
-
-help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG]
-
-optional arguments:
- -h, --help show this help message and exit
- -v, --version print version
- -t, --test print some tests
- -p, --print-proc-table
- print table of processes with their badness values
- -c CONFIG, --config CONFIG
- path to the config file, default values:
- ./nohang.conf, /etc/nohang/nohang.conf"""
-
-
-SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
-
-conf_err_mess = 'Invalid config. Exit.'
-
-sig_dict = {SIGKILL: 'SIGKILL',
- SIGTERM: 'SIGTERM'}
-
-self_pid = str(os.getpid())
-
-self_uid = os.geteuid()
-
-if self_uid == 0:
- root = True
-else:
- root = False
-
-
-if os.path.exists('./nohang_notify_helper'):
- notify_helper_path = './nohang_notify_helper'
-else:
- notify_helper_path = '/usr/sbin/nohang_notify_helper'
-
-
-victim_dict = dict()
-
-
-# will store corrective actions stat
-stat_dict = dict()
-
-
-separate_log = False # will be overwritten after parse config
-
-
-def find_cgroup_indexes():
- """ Find cgroup-line positions in /proc/*/cgroup file.
- """
-
- cgroup_v1_index = None
- cgroup_v2_index = None
-
- with open('/proc/self/cgroup') as f:
- for index, line in enumerate(f):
- if ':name=' in line:
- cgroup_v1_index = index
- if line.startswith('0::'):
- cgroup_v2_index = index
-
- return cgroup_v1_index, cgroup_v2_index
-
-
-cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes()
-
-
-##########################################################################
-
-# define functions
-
-
-def self_rss():
- """
- """
- return pid_to_status(self_pid)[5]
-
-
-def print_self_rss():
- """
- """
- log('Self RSS: {} MiB'.format(self_rss()))
-
-
-def signal_handler(signum, frame):
- log('Got signal {}'.format(signum))
- update_stat_dict_and_print(None)
- log('Exit')
- exit()
-
-
-def write(path, string):
- """
- """
- with open(path, 'w') as f:
- f.write(string)
-
-
-def write_self_oom_score_adj(new_value):
- """
- """
- if root:
- write('/proc/self/oom_score_adj', new_value)
-
-
-self_oom_score_adj_min = '-600'
-self_oom_score_adj_max = '-6'
-
-
-write_self_oom_score_adj(self_oom_score_adj_min)
-
-
-def exe(cmd):
- """
- """
- log('Execute the command: {}'.format(cmd))
- t0 = time()
- write_self_oom_score_adj(self_oom_score_adj_max)
- err = os.system(cmd)
- write_self_oom_score_adj(self_oom_score_adj_min)
- dt = time() - t0
- log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3)))
- return err
-
-
-def valid_re(reg_exp):
- """Validate regular expression.
- """
- try:
- search(reg_exp, '')
- except invalid_re:
- log('Invalid config: invalid regexp: {}'.format(reg_exp))
- exit(1)
-
-
-def func_print_proc_table():
- """
- """
- print_proc_table = True
- find_victim(print_proc_table)
- exit()
-
-
-def log(*msg):
- """
- """
- try:
- print(*msg)
- except OSError:
- sleep(0.01)
- # print('OSError in print(*msg)')
-
- if separate_log:
- # need fix: TypeError: not all arguments converted during string
- # formatting
-
- try:
- info(*msg)
- except OSError:
- sleep(0.01)
- # print('OSError in info(*msg)')
-
-
-def print_version():
- """
- сначала пытаться получ версию прямо из гита - вариант для неустановленых,
- для тех, кто еще не запускал make install
- """
- try:
- v = rline1('/etc/nohang/version')
- except FileNotFoundError:
- v = None
- if v is None:
- print('Nohang unknown version')
- else:
- print('Nohang ' + v)
- exit()
-
-
-def test():
- """
- """
-
- print(version)
- print(argv)
-
- hr = '=================================='
- print(hr)
- print("uptime()")
- print(uptime())
-
- print(hr)
- print("os.uname()")
- print(os.uname())
-
- print(hr)
- print("pid_to_starttime('self')")
- print(pid_to_starttime('self'))
-
- print(hr)
- print("get_victim_id('self')")
- print(get_victim_id('self'))
-
- print(hr)
- print("errprint('test')")
- print(errprint('test'))
-
- print(hr)
- print("mlockall()")
- print(mlockall())
-
- print(hr)
- print("pid_to_state('2')")
- print(pid_to_state('2'))
-
- '''
- print(hr)
- print("update_stat_dict_and_print('key')")
- print(update_stat_dict_and_print('key'))
-
- print(hr)
- print("psi_mem_some_avg_total()")
- print(psi_mem_some_avg_total())
-
- print(hr)
- print("psi_mem_some_avg10()")
- print(psi_mem_some_avg10())
-
-
-
- '''
-
- print(hr)
- exit()
-
-
-##########################################################################
-
-
-def pid_to_cgroup_v1(pid):
- """
- """
- cgroup_v1 = ''
- try:
- with open('/proc/' + pid + '/cgroup') as f:
- for index, line in enumerate(f):
- if index == cgroup_v1_index:
- cgroup_v1 = '/' + line.partition('/')[2][:-1]
- return cgroup_v1
- except FileNotFoundError:
- return ''
-
-
-def pid_to_cgroup_v2(pid):
- """
- """
- cgroup_v2 = ''
- try:
- with open('/proc/' + pid + '/cgroup') as f:
- for index, line in enumerate(f):
- if index == cgroup_v2_index:
- cgroup_v2 = line[3:-1]
- return cgroup_v2
- except FileNotFoundError:
- return ''
-
-
-def pid_to_starttime(pid):
- """ handle FNF error!
- """
- try:
- starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[
- 2].split(' ')[20]
-
- except UnicodeDecodeError:
- # print('LOL')
- with open('/proc/' + pid + '/stat', 'rb') as f:
- starttime = f.read().decode('utf-8', 'ignore').rpartition(
- ')')[2].split(' ')[20]
-
- return float(starttime) / SC_CLK_TCK
-
-
-def get_victim_id(pid):
- """victim_id is starttime + pid"""
- try:
- return rline1('/proc/' + pid + '/stat').rpartition(
- ')')[2].split(' ')[20] + pid
- except FileNotFoundError:
- return ''
-
-
-def pid_to_state(pid):
- """ Handle FNF error! (BTW it already handled in find_victim_info())
- """
- return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1]
-
-
-def pid_to_name(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/comm', 'rb') as f:
- return f.read().decode('utf-8', 'ignore')[:-1]
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
-
-
-def pid_to_ppid(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/status') as f:
- for n, line in enumerate(f):
- if n is ppid_index:
- return line.split('\t')[1].strip()
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
- except UnicodeDecodeError:
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
- for i in range(len(f_list)):
- if i is ppid_index:
- return f_list[i].split('\t')[1]
-
-
-def pid_to_ancestry(pid, max_ancestry_depth=1):
- """
- """
- if max_ancestry_depth == 1:
- ppid = pid_to_ppid(pid)
- pname = pid_to_name(ppid)
- return '\n PPID: {} ({})'.format(ppid, pname)
- if max_ancestry_depth == 0:
- return ''
- anc_list = []
- for i in range(max_ancestry_depth):
- ppid = pid_to_ppid(pid)
- pname = pid_to_name(ppid)
- anc_list.append((ppid, pname))
- if ppid == '1':
- break
- pid = ppid
- a = ''
- for i in anc_list:
- a = a + ' <= PID {} ({})'.format(i[0], i[1])
- return '\n Ancestry: ' + a[4:]
-
-
-def pid_to_cmdline(pid):
- """
- Get process cmdline by pid.
-
- pid: str pid of required process
- returns string cmdline
- """
- try:
- with open('/proc/' + pid + '/cmdline') as f:
- return f.read().replace('\x00', ' ').rstrip()
- except FileNotFoundError:
- return ''
-
-
-def pid_to_environ(pid):
- """
- Get process environ by pid.
-
- pid: str pid of required process
- returns string environ
- """
- try:
- with open('/proc/' + pid + '/environ') as f:
- return f.read().replace('\x00', ' ').rstrip()
- except FileNotFoundError:
- return ''
-
-
-def pid_to_realpath(pid):
- try:
- return os.path.realpath('/proc/' + pid + '/exe')
- except FileNotFoundError:
- return ''
-
-
-def pid_to_uid(pid):
- """return euid"""
- try:
- with open('/proc/' + pid + '/status') as f:
- for n, line in enumerate(f):
- if n is uid_index:
- return line.split('\t')[2]
- except UnicodeDecodeError:
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
- return f_list[uid_index].split('\t')[2]
- except FileNotFoundError:
- return ''
-
-
-def pid_to_badness(pid):
- """Find and modify badness (if it needs)."""
-
- try:
-
- oom_score = int(rline1('/proc/' + pid + '/oom_score'))
- badness = oom_score
-
- if decrease_oom_score_adj:
- oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
- if badness > oom_score_adj_max and oom_score_adj > 0:
- badness = badness - oom_score_adj + oom_score_adj_max
-
- if regex_matching:
- name = pid_to_name(pid)
- for re_tup in processname_re_list:
- if search(re_tup[1], name) is not None:
- badness += int(re_tup[0])
-
- if re_match_cgroup_v1:
- cgroup_v1 = pid_to_cgroup_v1(pid)
- for re_tup in cgroup_v1_re_list:
- if search(re_tup[1], cgroup_v1) is not None:
- badness += int(re_tup[0])
-
- if re_match_cgroup_v2:
- cgroup_v2 = pid_to_cgroup_v2(pid)
- for re_tup in cgroup_v2_re_list:
- if search(re_tup[1], cgroup_v2) is not None:
- badness += int(re_tup[0])
-
- if re_match_realpath:
- realpath = pid_to_realpath(pid)
- for re_tup in realpath_re_list:
- if search(re_tup[1], realpath) is not None:
- badness += int(re_tup[0])
-
- if re_match_cmdline:
- cmdline = pid_to_cmdline(pid)
- for re_tup in cmdline_re_list:
- if search(re_tup[1], cmdline) is not None:
- badness += int(re_tup[0])
-
- if re_match_environ:
- environ = pid_to_environ(pid)
- for re_tup in environ_re_list:
- if search(re_tup[1], environ) is not None:
- badness += int(re_tup[0])
-
- if re_match_uid:
- uid = pid_to_uid(pid)
- for re_tup in uid_re_list:
- if search(re_tup[1], uid) is not None:
- badness += int(re_tup[0])
-
- if forbid_negative_badness:
- if badness < 0:
- badness = 0
-
- return badness, oom_score
-
- except FileNotFoundError:
- return None, None
- except ProcessLookupError:
- return None, None
-
-
-def pid_to_status(pid):
- """
- """
-
- try:
-
- with open('/proc/' + pid + '/status') as f:
-
- for n, line in enumerate(f):
-
- if n is 0:
- name = line.split('\t')[1][:-1]
-
- if n is state_index:
- state = line.split('\t')[1][0]
- continue
-
- if n is ppid_index:
- ppid = line.split('\t')[1][:-1]
- continue
-
- if n is uid_index:
- uid = line.split('\t')[2]
- continue
-
- if n is vm_size_index:
- vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_rss_index:
- vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_swap_index:
- vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
- break
-
- return name, state, ppid, uid, vm_size, vm_rss, vm_swap
-
- except UnicodeDecodeError:
- return pid_to_status_unicode(pid)
-
- except FileNotFoundError:
- return None
-
- except ProcessLookupError:
- return None
-
- except ValueError:
- return None
-
-
-def pid_to_status_unicode(pid):
- """
- """
- try:
-
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
-
- for i in range(len(f_list)):
-
- if i is 0:
- name = f_list[i].split('\t')[1]
-
- if i is state_index:
- state = f_list[i].split('\t')[1][0]
-
- if i is ppid_index:
- ppid = f_list[i].split('\t')[1]
-
- if i is uid_index:
- uid = f_list[i].split('\t')[2]
-
- if i is vm_size_index:
- vm_size = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_rss_index:
- vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_swap_index:
- vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- return name, state, ppid, uid, vm_size, vm_rss, vm_swap
-
- except FileNotFoundError:
- return None
-
- except ProcessLookupError:
- return None
-
- except ValueError:
- return None
-
-
-##########################################################################
-
-
-def uptime():
- """
- """
- return float(rline1('/proc/uptime').split(' ')[0])
-
-
-def errprint(*text):
- """
- """
- print(*text, file=stderr, flush=True)
-
-
-def mlockall():
- """Lock all memory to prevent swapping nohang process."""
-
- MCL_CURRENT = 1
- MCL_FUTURE = 2
- MCL_ONFAULT = 4
-
- libc = CDLL('libc.so.6', use_errno=True)
-
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
- )
- if result != 0:
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE
- )
- if result != 0:
- log('WARNING: cannot lock all memory')
- else:
- log('All memory locked with MCL_CURRENT | MCL_FUTURE')
- else:
- log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
-
-
-def update_stat_dict_and_print(key):
- """
- """
-
- if key is not None:
-
- if key not in stat_dict:
-
- stat_dict.update({key: 1})
-
- else:
-
- new_value = stat_dict[key] + 1
- stat_dict.update({key: new_value})
-
- if print_total_stat:
-
- stats_msg = 'Total stat (what happened in the last {}):'.format(
- format_time(time() - start_time))
-
- for i in stat_dict:
- stats_msg += '\n {}: {}'.format(i, stat_dict[i])
-
- log(stats_msg)
-
-
-def find_psi_metrics_value(psi_path, psi_metrics):
- """
- """
-
- if psi_support:
-
- if psi_metrics == 'some_avg10':
- return float(rline1(psi_path).split(' ')[1].split('=')[1])
- if psi_metrics == 'some_avg60':
- return float(rline1(psi_path).split(' ')[2].split('=')[1])
- if psi_metrics == 'some_avg300':
- return float(rline1(psi_path).split(' ')[3].split('=')[1])
-
- if psi_metrics == 'full_avg10':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[1].split('=')[1])
- if psi_metrics == 'full_avg60':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[2].split('=')[1])
- if psi_metrics == 'full_avg300':
- with open(psi_path) as f:
- psi_list = f.readlines()
- return float(psi_list[1].split(' ')[3].split('=')[1])
-
-
-def check_mem():
- """find mem_available"""
- # исправить название фции
- return int(rline1('/proc/meminfo').split(':')[1][:-4])
-
-
-def check_mem_and_swap():
- """find mem_available, swap_total, swap_free"""
- with open('/proc/meminfo') as f:
- for n, line in enumerate(f):
- if n is 2:
- mem_available = int(line.split(':')[1][:-4])
- continue
- if n is swap_total_index:
- swap_total = int(line.split(':')[1][:-4])
- continue
- if n is swap_free_index:
- swap_free = int(line.split(':')[1][:-4])
- break
- return mem_available, swap_total, swap_free
-
-
-def check_zram():
- """find MemUsedZram"""
- disksize_sum = 0
- mem_used_total_sum = 0
-
- for dev in os.listdir('/sys/block'):
- if dev.startswith('zram'):
- stat = zram_stat(dev)
- disksize_sum += int(stat[0])
- mem_used_total_sum += int(stat[1])
-
- # Means that when setting zram disksize = 1 GiB available memory
- # decrease by 0.0042 GiB.
- # Found experimentally, requires clarification with different kernaels and
- # architectures.
- # On small disk drives (up to gigabyte) it can be more, up to 0.0045.
- # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should
- # be 0.001:
- # ("zram uses about 0.1% of the size of the disk"
- # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt),
- # but this statement contradicts the experimental data.
- # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize
- # Found experimentally.
- ZRAM_DISKSIZE_FACTOR = 0.0042
-
- return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0
-
-
-def format_time(t):
- """
- """
- t = int(t)
- if t < 60:
- return '{} sec'.format(t)
- elif t >= 60 and t < 3600:
- m = t // 60
- s = t % 60
- return '{} min {} sec'.format(m, s)
- else:
- h = t // 3600
- s0 = t - h * 3600
- m = s0 // 60
- s = s0 % 60
- return '{} h {} min {} sec'.format(h, m, s)
-
-
-def string_to_float_convert_test(string):
- """Try to interprete string values as floats."""
- try:
- return float(string)
- except ValueError:
- return None
-
-
-def string_to_int_convert_test(string):
- """Try to interpret string values as integers."""
- try:
- return int(string)
- except ValueError:
- return None
-
-
-def conf_parse_string(param):
- """
- Get string parameters from the config dict.
-
- param: config_dict key
- returns config_dict[param].strip()
- """
- if param in config_dict:
- return config_dict[param].strip()
- else:
- errprint('All the necessary parameters must be in the config')
- errprint('There is no "{}" parameter in the config'.format(param))
- exit(1)
-
-
-def conf_parse_bool(param):
- """
- Get bool parameters from the config_dict.
-
- param: config_dicst key
- returns bool
- """
- if param in config_dict:
- param_str = config_dict[param]
- if param_str == 'True':
- return True
- elif param_str == 'False':
- return False
- else:
- errprint('Invalid value of the "{}" parameter.'.format(param))
- errprint('Valid values are True and False.')
- errprint('Exit')
- exit(1)
- else:
- errprint('All the necessary parameters must be in the config')
- errprint('There is no "{}" parameter in the config'.format(param))
- exit(1)
-
-
-def rline1(path):
- """read 1st line from path."""
- try:
- with open(path) as f:
- for line in f:
- return line[:-1]
- except UnicodeDecodeError:
- # print('UDE rline1', path)
- with open(path, 'rb') as f:
- return f.read(999).decode(
- 'utf-8', 'ignore').split('\n')[0] # use partition()!
-
-
-def kib_to_mib(num):
- """Convert KiB values to MiB values."""
- return round(num / 1024.0)
-
-
-def percent(num):
- """Interprete num as percentage."""
- return round(num * 100, 1)
-
-
-def just_percent_mem(num):
- """convert num to percent and justify"""
- return str(round(num * 100, 1)).rjust(4, ' ')
-
-
-def just_percent_swap(num):
- """
- """
- return str(round(num * 100, 1)).rjust(5, ' ')
-
-
-def human(num, lenth):
- """Convert KiB values to MiB values with right alignment"""
- return str(round(num / 1024)).rjust(lenth, ' ')
-
-
-def zram_stat(zram_id):
- """
- Get zram state.
-
- zram_id: str zram block-device id
- returns bytes diskcize, str mem_used_total
- """
- try:
- disksize = rline1('/sys/block/' + zram_id + '/disksize')
- except FileNotFoundError:
- return '0', '0'
- if disksize == ['0\n']:
- return '0', '0'
- try:
- mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ')
- mm_stat_list = []
- for i in mm_stat:
- if i != '':
- mm_stat_list.append(i)
- mem_used_total = mm_stat_list[2]
- except FileNotFoundError:
- mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total')
- return disksize, mem_used_total # BYTES, str
-
-
-def send_notify_warn():
- """
- Look for process with maximum 'badness' and warn user with notification.
- (implement Low memory warnings)
- """
-
- '''
- # find process with max badness
- fat_tuple = find_victim()
- pid = fat_tuple[0]
- name = pid_to_name(pid)
-
- if mem_used_zram > 0:
- low_mem_percent = '{}% {}% {}%'.format(
- round(mem_available / mem_total * 100),
- round(swap_free / (swap_total + 0.1) * 100),
- round(mem_used_zram / mem_total * 100))
- elif swap_free > 0:
- low_mem_percent = '{}% {}%'.format(
- round(mem_available / mem_total * 100),
- round(swap_free / (swap_total + 0.1) * 100))
- else:
- low_mem_percent = '{}%'.format(
- round(mem_available / mem_total * 100))
-
- # title = 'Low memory: {}'.format(low_mem_percent)
- title = 'Low memory'
- '''
-
- '''
- body2 = 'Next victim: {}[{}]'.format(
- name.replace(
- # symbol '&' can break notifications in some themes,
- # therefore it is replaced by '*'
- '&', '*'),
- pid
- )
- '''
-
- '''
- body = 'MemAvail: {}%\nSwapFree: {}%'.format(
- round(mem_available / mem_total * 100),
- round(swap_free / (swap_total + 0.1) * 100))
-
- if root: # If nohang was started by root
- # send notification to all active users with special script
- notify_helper(title, body)
- else: # Or by regular user
- # send notification to user that runs this nohang
- notify_send_wait(title, body)
- '''
-
- log('Warning threshold exceeded')
-
- if check_warning_exe:
- exe(warning_exe)
-
- else:
-
- title = 'Low memory'
-
- body = 'MemAvail: {}%\nSwapFree: {}%'.format(
- round(mem_available / mem_total * 100),
- round(swap_free / (swap_total + 0.1) * 100)
- )
-
- send_notification(title, body)
-
-
-def send_notify(signal, name, pid):
- """
- Notificate about OOM Preventing.
-
- signal: key for notify_sig_dict
- name: str process name
- pid: str process pid
- """
-
- # wait for memory release after corrective action
- # may be useful if free memory was about 0 immediately after
- # corrective action
- sleep(0.05)
-
- title = 'Freeze prevention'
- body = '{} [{}] {}'.format(
- notify_sig_dict[signal],
- pid,
- name.replace(
- # symbol '&' can break notifications in some themes,
- # therefore it is replaced by '*'
- '&', '*'
- )
- )
-
- send_notification(title, body)
-
-
-def send_notify_etc(pid, name, command):
- """
- Notificate about OOM Preventing.
-
- command: str command that will be executed
- name: str process name
- pid: str process pid
- """
- title = 'Freeze prevention'
- body = 'Victim is [{}] {}\nExecute the co' \
- 'mmand:\n{}'.format(
- pid, name.replace('&', '*'), command.replace('&', '*'))
-
- send_notification(title, body)
-
-
-def send_notification(title, body):
- """
- """
- split_by = '#' * 16
-
- t000 = time()
-
- path_to_cache = '/dev/shm/nohang_notify_cache_uid{}_time{}'.format(
- str(self_uid), t000
- )
-
- text = '{}{}{}'.format(title, split_by, body)
-
- try:
- with open(path_to_cache, 'w') as f:
- f.write(text)
- os.chmod(path_to_cache, 0o600)
- except OSError:
- log('OSError while send notification '
- '(No space left on device: /dev/shm)')
- return None
-
- cmd = '{} --uid {} --time {} &'.format(notify_helper_path, self_uid, t000)
-
- exe(cmd)
-
-
-def sleep_after_send_signal(signal):
- """
- Sleeping after signal was sent.
-
- signal: sent signal
- """
- if signal is SIGKILL:
- if print_sleep_periods:
- log(' sleep {}'.format(min_delay_after_sigkill))
- sleep(min_delay_after_sigkill)
- else:
- if print_sleep_periods:
- log('Sleep {} sec after implementing a corrective action'.format(
- min_delay_after_sigterm))
- sleep(min_delay_after_sigterm)
-
-
-def get_pid_list():
- """
- Find pid list expect kthreads and zombies
- """
- pid_list = []
- for pid in os.listdir('/proc'):
- if os.path.exists('/proc/' + pid + '/exe') is True:
- pid_list.append(pid)
- return pid_list
-
-
-pid_list = get_pid_list()
-
-
-def get_non_decimal_pids():
- """
- """
- non_decimal_list = []
- for pid in pid_list:
- if pid[0].isdecimal() is False:
- non_decimal_list.append(pid)
- return non_decimal_list
-
-
-def find_victim(_print_proc_table):
- """
- Find the process with highest badness and its badness adjustment
- Return pid and badness
- """
-
- ft1 = time()
-
- pid_list = get_pid_list()
-
- pid_list.remove(self_pid)
-
- if '1' in pid_list:
- pid_list.remove('1')
-
- non_decimal_list = get_non_decimal_pids()
-
- for i in non_decimal_list:
- if i in pid_list: # ????????????????????????????????????????????
- pid_list.remove(i)
-
- pid_badness_list = []
-
- if _print_proc_table:
-
- if extra_table_info == 'None':
- extra_table_title = ''
-
- elif extra_table_info == 'cgroup_v1':
- extra_table_title = 'CGroup_v1'
-
- elif extra_table_info == 'cgroup_v2':
- extra_table_title = 'CGroup_v2'
-
- elif extra_table_info == 'cmdline':
- extra_table_title = 'cmdline'
-
- elif extra_table_info == 'environ':
- extra_table_title = 'environ'
-
- elif extra_table_info == 'realpath':
- extra_table_title = 'realpath'
-
- elif extra_table_info == 'All':
- extra_table_title = '[CGroup] [CmdLine] [RealPath]'
- else:
- extra_table_title = ''
-
- hr = '#' * 115
-
- log(hr)
- log('# PID PPID badness oom_score oom_score_adj eUID S VmSize VmRSS VmSwap Name {}'.format(
- extra_table_title))
- log('#------- ------- ------- --------- ------------- ---------- - ------ ----- ------ --------------- --------')
-
- for pid in pid_list:
-
- badness = pid_to_badness(pid)[0]
-
- if badness is None:
- continue
-
- if _print_proc_table:
-
- try:
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
- except FileNotFoundError:
- continue
-
- if pid_to_status(pid) is None:
- continue
- else:
- (name, state, ppid, uid, vm_size, vm_rss,
- vm_swap) = pid_to_status(pid)
-
- if extra_table_info == 'None':
- extra_table_line = ''
-
- elif extra_table_info == 'cgroup_v1':
- extra_table_line = pid_to_cgroup_v1(pid)
-
- elif extra_table_info == 'cgroup_v2':
- extra_table_line = pid_to_cgroup_v2(pid)
-
- elif extra_table_info == 'cmdline':
- extra_table_line = pid_to_cmdline(pid)
-
- elif extra_table_info == 'environ':
- extra_table_line = pid_to_environ(pid)
-
- elif extra_table_info == 'realpath':
- extra_table_line = pid_to_realpath(pid)
-
- elif extra_table_info == 'All':
- extra_table_line = '[CG: {}] [CL: {}] [RP: {}]'.format(
- pid_to_cgroup_v1(pid),
- pid_to_cmdline(pid),
- pid_to_realpath(pid)
- )
- else:
- extra_table_line = ''
-
- log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format(
- pid.rjust(7),
- ppid.rjust(7),
- str(badness).rjust(7),
- oom_score.rjust(9),
- oom_score_adj.rjust(13),
- uid.rjust(10),
- state,
- str(vm_size).rjust(6),
- str(vm_rss).rjust(5),
- str(vm_swap).rjust(6),
- name.ljust(15),
- extra_table_line
- )
- )
-
- pid_badness_list.append((pid, badness))
-
- real_proc_num = len(pid_badness_list)
-
- # Make list of (pid, badness) tuples, sorted by 'badness' values
- # print(pid_badness_list)
- pid_tuple_list = sorted(
- pid_badness_list,
- key=itemgetter(1),
- reverse=True
- )[0]
-
- pid = pid_tuple_list[0]
-
- # Get maximum 'badness' value
- victim_badness = pid_tuple_list[1]
- victim_name = pid_to_name(pid)
-
- if _print_proc_table:
- log(hr)
-
- log('Found {} processes with existing realpaths'.format(real_proc_num))
-
- log(
- 'Process with highest badness (found in {} ms):\n PID: {}, Na'
- 'me: {}, badness: {}'.format(
- round((time() - ft1) * 1000),
- pid,
- victim_name,
- victim_badness
- )
- )
-
- return pid, victim_badness, victim_name
-
-
-def find_victim_info(pid, victim_badness, name):
- """
- """
- status0 = time()
-
- try:
-
- with open('/proc/' + pid + '/status') as f:
-
- for n, line in enumerate(f):
-
- if n is state_index:
- state = line.split('\t')[1].rstrip()
- continue
-
- if n is ppid_index:
- ppid = line.split('\t')[1]
- continue
-
- if n is uid_index:
- uid = line.split('\t')[2]
- continue
-
- if n is vm_size_index:
- vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_rss_index:
- vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
- continue
-
- if detailed_rss:
-
- if n is anon_index:
- anon_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is file_index:
- file_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is shmem_index:
- shmem_rss = kib_to_mib(
- int(line.split('\t')[1][:-4]))
- continue
-
- if n is vm_swap_index:
- vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
- break
-
- cmdline = pid_to_cmdline(pid)
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
-
- except FileNotFoundError:
- log('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
- except ProcessLookupError:
- log('The victim died in the search process: ProcessLookupError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ProcessLookupError')
- return None
- except UnicodeDecodeError:
-
- with open('/proc/' + pid + '/status', 'rb') as f:
- f_list = f.read().decode('utf-8', 'ignore').split('\n')
-
- for i in range(len(f_list)):
-
- if i is state_index:
- state = f_list[i].split('\t')[1].rstrip()
-
- if i is ppid_index:
- ppid = f_list[i].split('\t')[1]
-
- if i is uid_index:
- uid = f_list[i].split('\t')[2]
-
- if i is vm_size_index:
- vm_size = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_rss_index:
- vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3]))
-
- if detailed_rss:
-
- if i is anon_index:
- anon_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is file_index:
- file_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is shmem_index:
- shmem_rss = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- if i is vm_swap_index:
- vm_swap = kib_to_mib(
- int(f_list[i].split('\t')[1][:-3]))
-
- cmdline = pid_to_cmdline(pid)
- oom_score = rline1('/proc/' + pid + '/oom_score')
- oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
-
- except IndexError:
- log('The victim died in the search process: IndexError')
- update_stat_dict_and_print(
- 'The victim died in the search process: IndexError')
- return None
- except ValueError:
- log('The victim died in the search process: ValueError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ValueError')
- return None
- except FileNotFoundError:
- log('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
- except ProcessLookupError:
- log('The victim died in the search process: ProcessLookupError')
- update_stat_dict_and_print(
- 'The victim died in the search process: ProcessLookupError')
- return None
-
- # print((time() - status0) * 1000, 'status time')
-
- len_vm = len(str(vm_size))
-
- try:
- realpath = os.path.realpath('/proc/' + pid + '/exe')
- victim_lifetime = format_time(uptime() - pid_to_starttime(pid))
- victim_cgroup_v1 = pid_to_cgroup_v1(pid)
- victim_cgroup_v2 = pid_to_cgroup_v2(pid)
-
- except FileNotFoundError:
- print('The victim died in the search process: FileNotFoundError')
- update_stat_dict_and_print(
- 'The victim died in the search process: FileNotFoundError')
- return None
-
- # te1 = time()
- ancestry = pid_to_ancestry(pid, max_ancestry_depth)
- # print((time() - te1) * 1000, 'ms, ancestry')
-
- if detailed_rss:
- detailed_rss_info = ' (' \
- 'Anon: {} MiB, ' \
- 'File: {} MiB, ' \
- 'Shmem: {} MiB)'.format(
- anon_rss,
- file_rss,
- shmem_rss)
- else:
- detailed_rss_info = ''
-
- victim_info = 'Victim information (found in {} ms):' \
- '\n Name: {}' \
- '\n State: {}' \
- '\n PID: {}' \
- '{}' \
- '\n EUID: {}' \
- '\n badness: {}, ' \
- 'oom_score: {}, ' \
- 'oom_score_adj: {}' \
- '\n VmSize: {} MiB' \
- '\n VmRSS: {} MiB {}' \
- '\n VmSwap: {} MiB' \
- '\n CGroup_v1: {}' \
- '\n CGroup_v2: {}' \
- '\n Realpath: {}' \
- '\n Cmdline: {}' \
- '\n Lifetime: {}'.format(
- round((time() - status0) * 1000),
- name,
- state,
- pid,
- ancestry,
- uid,
- victim_badness,
- oom_score,
- oom_score_adj,
- vm_size,
- str(vm_rss).rjust(len_vm),
- detailed_rss_info,
- str(vm_swap).rjust(len_vm),
- victim_cgroup_v1,
- victim_cgroup_v2,
- realpath,
- cmdline,
- victim_lifetime)
-
- return victim_info
-
-
-# для дедупликации уведомлений
-dick = dict()
-dick['v'] = [1, 2, 3, time()]
-
-
-def implement_corrective_action(signal):
- """
- Find victim with highest badness and send SIGTERM/SIGKILL
- """
-
- notif = True
-
- log(mem_info)
-
- pid, victim_badness, name = find_victim(print_proc_table)
-
- if victim_badness >= min_badness:
-
- if print_victim_info:
- victim_info = find_victim_info(pid, victim_badness, name)
- log(victim_info)
-
- # kill the victim if it doesn't respond to SIGTERM
- if signal is SIGTERM:
- victim_id = get_victim_id(pid)
- if victim_id not in victim_dict:
- victim_dict.update({victim_id: time()})
- else:
- if time() - victim_dict[
- victim_id] > max_post_sigterm_victim_lifetime:
- print(
- '\nmax_post_sigterm_victim_lifetime excee'
- 'ded: the victim will get SIGKILL'
- )
- signal = SIGKILL
-
- soft_match = False
-
- if soft_actions and signal is SIGTERM:
- # если мягкий порог И список мягких не пуст:
- # итерируемся по списку, ища мэтчинги. Есть совпадения - выполн
- # команду и выход из цикла.
- name = pid_to_name(pid)
- cgroup_v1 = pid_to_cgroup_v1(pid)
- service = ''
- cgroup_v1_tail = cgroup_v1.rpartition('/')[2]
- # log(cgroup_v1_tail)
- if cgroup_v1_tail.endswith('.service'):
- service = cgroup_v1_tail
- # print('$SERVICE:', [service])
- # print('ИЩЕМ СОВПАДЕНИЯ ДЛЯ МЯГКИХ ДЕЙСТВИЙ')
- # итерируемся по списку кортежей
- for i in soft_actions_list:
- unit = i[0]
- if unit == 'name':
- u = name
- else:
- u = cgroup_v1
- regexp = i[1]
- command = i[2]
- # print([u, regexp, command])
- if search(regexp, u) is not None:
- log("Regexp '{}' matches with {} '{}'".format(regexp, unit, u))
- # print('СОВПАДЕНИЕ НАЙДЕНО')
- soft_match = True
- break
-
- if soft_match:
-
- # todo: make new func
- m = check_mem_and_swap()
- ma = int(m[0]) / 1024.0
- sf = int(m[2]) / 1024.0
- log('Memory status before implementing a corrective act'
- 'ion:\n MemAvailable'
- ': {} MiB, SwapFree: {} MiB'.format(
- round(ma, 1), round(sf, 1)
- )
- )
-
- cmd = command.replace(
- '$PID',
- pid).replace(
- '$NAME',
- pid_to_name(pid)).replace(
- '$SERVICE',
- service)
-
- exit_status = exe(cmd)
-
- exit_status = str(exit_status)
-
- response_time = time() - time0
-
- etc_info = 'Implement a corrective act' \
- 'ion:\n Run the command: {}' \
- '\n Exit status: {}; total response ' \
- 'time: {} ms'.format(
- cmd,
- exit_status,
- round(response_time * 1000))
-
- log(etc_info)
-
- key = "Run the command '{}'".format(cmd)
- update_stat_dict_and_print(key)
-
- if gui_notifications:
- send_notify_etc(
- pid,
- name,
- command.replace('$PID', pid).replace(
- '$NAME', pid_to_name(pid)))
-
- else:
-
- try:
-
- mem_available, swap_total, swap_free = check_mem_and_swap()
-
- ma_mib = int(mem_available) / 1024.0
- sf_mib = int(swap_free) / 1024.0
- log('Memory status before implementing a corrective act'
- 'ion:\n MemAvailable'
- ': {} MiB, SwapFree: {} MiB'.format(
- round(ma_mib, 1), round(sf_mib, 1)
- )
- )
-
- if (mem_available <= mem_min_sigkill_kb and
- swap_free <= swap_min_sigkill_kb):
- log('Hard threshold exceeded')
- signal = SIGKILL
-
- os.kill(int(pid), signal)
- response_time = time() - time0
- send_result = 'total response time: {} ms'.format(
- round(response_time * 1000))
-
- preventing_oom_message = 'Implement a corrective action:' \
- '\n Send {} to the victim; {}'.format(
- sig_dict[signal], send_result)
-
- key = 'Send {} to {}'.format(sig_dict[signal], name)
-
- if signal is SIGKILL and post_kill_exe != '':
-
- cmd = post_kill_exe.replace('$PID', pid).replace(
- '$NAME', pid_to_name(pid))
-
- log('Execute post_kill_exe')
-
- exe(cmd)
-
- if gui_notifications:
-
- # min delay after same notification
- # все не так. От этого вообще пол дедупликация . терминация
- # один раз покажется при любом раскладе.
- delay_after_same_notify = 1
-
- x = dick['v']
-
- dick['v'] = [signal, name, pid, time()]
-
- y = dick['v']
-
- # print(y[3] - x[3])
-
- if x[0] == y[0] and x[1] == y[1] and x[2] == y[2]:
- # print('совпадение имени, пид, сигнала')
-
- # сохр в словаре первре совпавшее время
- dt = y[3] - x[3]
- # print(dt, 'dt')
- if dt < delay_after_same_notify:
- notif = False
-
- if notif:
- send_notify(signal, name, pid)
-
- except FileNotFoundError:
- response_time = time() - time0
- send_result = 'no such process; response time: {} ms'.format(
- round(response_time * 1000))
- key = 'FileNotFoundError (the victim died in the se' \
- 'arch process): '
- except ProcessLookupError:
- response_time = time() - time0
- send_result = 'no such process; response time: {} ms'.format(
- round(response_time * 1000))
- key = 'ProcessLookupError (the victim died in the se' \
- 'arch process): '
-
- try:
- log(preventing_oom_message)
- except UnboundLocalError:
- preventing_oom_message = key
-
- update_stat_dict_and_print(key)
-
- else:
-
- response_time = time() - time0
- victim_badness_is_too_small = 'victim badness {} < min_b' \
- 'adness {}; nothing to do; response time: {} ms'.format(
- victim_badness,
- min_badness,
- round(response_time * 1000))
-
- log(victim_badness_is_too_small)
-
- # update stat_dict
- key = 'victim badness < min_badness'
- update_stat_dict_and_print(key)
-
- sleep_after_send_signal(signal)
-
-
-def sleep_after_check_mem():
- """Specify sleep times depends on rates and avialable memory."""
-
- if mem_min_sigkill_kb < mem_min_sigterm_kb:
- mem_point = mem_available - mem_min_sigterm_kb
- else:
- mem_point = mem_available - mem_min_sigkill_kb
-
- if swap_min_sigkill_kb < swap_min_sigterm_kb:
- swap_point = swap_free - swap_min_sigterm_kb
- else:
- swap_point = swap_free - swap_min_sigkill_kb
-
- if swap_point < 0:
- swap_point = 0
-
- if mem_point < 0:
- mem_point = 0
-
- t_mem = mem_point / rate_mem
- t_swap = swap_point / rate_swap
- t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram
- if t_zram < 0:
- t_zram = 0
-
- t_mem_swap = t_mem + t_swap
- t_mem_zram = t_mem + t_zram
-
- if t_mem_swap <= t_mem_zram:
- t = t_mem_swap
- else:
- t = t_mem_zram
-
- if t > max_sleep_time:
- t = max_sleep_time
- elif t < min_sleep_time:
- t = min_sleep_time
- else:
- pass
-
- if print_sleep_periods:
-
- log(
- 'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format(
- round(t, 2),
- round(t_mem, 2),
- round(t_swap, 2),
- round(t_zram, 2)
- )
- )
-
- try:
- stdout.flush()
- except OSError: # OSError: [Errno 105] No buffer space available
- pass
-
- sleep(t)
-
-
-def calculate_percent(arg_key):
- """
- parse conf dict
- Calculate mem_min_KEY_percent.
-
- Try use this one)
- arg_key: str key for config_dict
- returns int mem_min_percent or NoneType if got some error
- """
-
- if arg_key in config_dict:
- mem_min = config_dict[arg_key]
-
- if mem_min.endswith('%'):
- # truncate percents, so we have a number
- mem_min_percent = mem_min[:-1].strip()
- # then 'float test'
- mem_min_percent = string_to_float_convert_test(mem_min_percent)
- if mem_min_percent is None:
- errprint('Invalid {} value, not float\nExit'.format(arg_key))
- exit(1)
- # Final validations...
- if mem_min_percent < 0 or mem_min_percent > 100:
- errprint(
- '{}, as percents value, out of ran'
- 'ge [0; 100]\nExit'.format(arg_key))
- exit(1)
-
- # mem_min_sigterm_percent is clean and valid float percentage. Can
- # translate into Kb
- mem_min_kb = mem_min_percent / 100 * mem_total
- mem_min_mb = round(mem_min_kb / 1024)
-
- elif mem_min.endswith('M'):
- mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip())
- if mem_min_mb is None:
- errprint('Invalid {} value, not float\nExit'.format(arg_key))
- exit(1)
- mem_min_kb = mem_min_mb * 1024
- if mem_min_kb > mem_total:
- errprint(
- '{} value can not be greater then MemT'
- 'otal ({} MiB)\nExit'.format(
- arg_key, round(
- mem_total / 1024)))
- exit(1)
- mem_min_percent = mem_min_kb / mem_total * 100
-
- else:
- log('Invalid {} units in config.\n Exit'.format(arg_key))
- exit(1)
- mem_min_percent = None
-
- else:
- log('{} not in config\nExit'.format(arg_key))
- exit(1)
- mem_min_percent = None
-
- return mem_min_kb, mem_min_mb, mem_min_percent
-
-
-##########################################################################
-
-
-print_proc_table_flag = False
-
-# print(len(argv), argv)
-
-if len(argv) == 1:
- if os.path.exists('./nohang.conf'):
- config = os.getcwd() + '/nohang.conf'
- else:
- config = '/etc/nohang/nohang.conf'
-
-elif len(argv) == 2:
- if argv[1] == '--help' or argv[1] == '-h':
- print(help_mess)
- exit()
- elif argv[1] == '--version' or argv[1] == '-v':
- print_version()
- elif argv[1] == '--test' or argv[1] == '-t':
- test()
- elif argv[1] == '--print-proc-table' or argv[1] == '-p':
- print_proc_table_flag = True
- if os.path.exists('./nohang.conf'):
- config = os.getcwd() + '/nohang.conf'
- else:
- config = '/etc/nohang/nohang.conf'
- else:
- errprint('Unknown option: {}'.format(argv[1]))
- exit(1)
-
-elif len(argv) == 3:
- if argv[1] == '--config' or argv[1] == '-c':
- config = argv[2]
- else:
- errprint('Unknown option: {}'.format(argv[1]))
- exit(1)
-
-else:
- errprint('Invalid CLI input: too many options')
- exit(1)
-
-
-##########################################################################
-
-
-# find mem_total
-# find positions of SwapFree and SwapTotal in /proc/meminfo
-
-with open('/proc/meminfo') as f:
- mem_list = f.readlines()
-
-mem_list_names = []
-for s in mem_list:
- mem_list_names.append(s.split(':')[0])
-
-if mem_list_names[2] != 'MemAvailable':
- errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied')
- # exit(1)
-
-swap_total_index = mem_list_names.index('SwapTotal')
-swap_free_index = swap_total_index + 1
-
-mem_total = int(mem_list[0].split(':')[1][:-4])
-
-# Get names from /proc/*/status to be able to get VmRSS and VmSwap values
-
-with open('/proc/self/status') as file:
- status_list = file.readlines()
-
-status_names = []
-for s in status_list:
- status_names.append(s.split(':')[0])
-
-ppid_index = status_names.index('PPid')
-vm_size_index = status_names.index('VmSize')
-vm_rss_index = status_names.index('VmRSS')
-vm_swap_index = status_names.index('VmSwap')
-uid_index = status_names.index('Uid')
-state_index = status_names.index('State')
-
-
-try:
- anon_index = status_names.index('RssAnon')
- file_index = status_names.index('RssFile')
- shmem_index = status_names.index('RssShmem')
- detailed_rss = True
- # print(detailed_rss, 'detailed_rss')
-except ValueError:
- detailed_rss = False
- # print('It is not Linux 4.5+')
-
-##########################################################################
-
-
-log('Config: ' + config)
-
-
-##########################################################################
-
-# parsing the config with obtaining the parameters dictionary
-
-# conf_parameters_dict
-# conf_restart_dict
-
-# dictionary with config options
-config_dict = dict()
-
-processname_re_list = []
-cmdline_re_list = []
-environ_re_list = []
-uid_re_list = []
-cgroup_v1_re_list = []
-cgroup_v2_re_list = []
-realpath_re_list = []
-
-soft_actions_list = []
-
-
-# separator for optional parameters (that starts with @)
-opt_separator = '///'
-
-
-# stupid conf parsing, need refactoring
-try:
- with open(config) as f:
-
- for line in f:
-
- a = line.startswith('#')
- b = line.startswith('\n')
- c = line.startswith('\t')
- d = line.startswith(' ')
-
- etc = line.startswith('@SOFT_ACTION_RE_NAME')
- etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1')
-
- if not a and not b and not c and not d and not etc and not etc2:
- a = line.partition('=')
-
- key = a[0].strip()
- value = a[2].strip()
-
- if key not in config_dict:
- config_dict[key] = value
- else:
- log('ERROR: config key duplication: {}'.format(key))
- exit(1)
-
- if etc:
-
- # это остаток строки без первого ключа. Содержит: регулярка ///
- # команда
- a = line.partition('@SOFT_ACTION_RE_NAME')[
- 2].partition(opt_separator)
-
- a1 = 'name'
-
- a2 = a[0].strip()
- valid_re(a2)
-
- a3 = a[2].strip()
-
- zzz = (a1, a2, a3)
-
- # print(zzz)
-
- soft_actions_list.append(zzz)
-
- if etc2:
-
- # это остаток строки без первого ключа. Содержит: регулярка ///
- # команда
- a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[
- 2].partition(opt_separator)
-
- a1 = 'cgroup_v1'
-
- a2 = a[0].strip()
- valid_re(a2)
-
- a3 = a[2].strip()
-
- zzz = (a1, a2, a3)
-
- # print(zzz)
-
- soft_actions_list.append(zzz)
-
- if line.startswith('@PROCESSNAME_RE'):
- a = line.partition(
- '@PROCESSNAME_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- processname_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@CMDLINE_RE'):
- a = line.partition(
- '@CMDLINE_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- cmdline_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@UID_RE'):
- a = line.partition(
- '@UID_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- uid_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@CGROUP_V1_RE'):
- a = line.partition(
- '@CGROUP_V1_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- cgroup_v1_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@CGROUP_V2_RE'):
- a = line.partition(
- '@CGROUP_V2_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- cgroup_v2_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@REALPATH_RE'):
- a = line.partition(
- '@REALPATH_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- realpath_re_list.append((badness_adj, reg_exp))
-
- if line.startswith('@ENVIRON_RE'):
- a = line.partition(
- '@ENVIRON_RE')[2].strip(' \n').partition(opt_separator)
- badness_adj = a[0].strip(' ')
- reg_exp = a[2].strip(' ')
- valid_re(reg_exp)
- environ_re_list.append((badness_adj, reg_exp))
-
-
-except PermissionError:
- errprint('PermissionError', conf_err_mess)
- exit(1)
-except UnicodeDecodeError:
- errprint('UnicodeDecodeError', conf_err_mess)
- exit(1)
-except IsADirectoryError:
- errprint('IsADirectoryError', conf_err_mess)
- exit(1)
-except IndexError:
- errprint('IndexError', conf_err_mess)
- exit(1)
-except FileNotFoundError:
- errprint('FileNotFoundError', conf_err_mess)
- exit(1)
-
-
-if processname_re_list == []:
- regex_matching = False
-else:
- regex_matching = True
-
-
-if cmdline_re_list == []:
- re_match_cmdline = False
-else:
- re_match_cmdline = True
-
-
-if uid_re_list == []:
- re_match_uid = False
-else:
- re_match_uid = True
-
-
-if environ_re_list == []:
- re_match_environ = False
-else:
- re_match_environ = True
-
-
-if realpath_re_list == []:
- re_match_realpath = False
-else:
- re_match_realpath = True
-
-
-if cgroup_v1_re_list == []:
- re_match_cgroup_v1 = False
-else:
- re_match_cgroup_v1 = True
-
-if cgroup_v2_re_list == []:
- re_match_cgroup_v2 = False
-else:
- re_match_cgroup_v2 = True
-
-
-# print(processname_re_list)
-# print(cmdline_re_list)
-# print(uid_re_list)
-# print(environ_re_list)
-# print(realpath_re_list)
-# print(cgroup_v1_re_list)
-# print(cgroup_v2_re_list)
-
-
-# print(soft_actions_list)
-
-if soft_actions_list == []:
- soft_actions = False
-else:
- soft_actions = True
-
-# print('soft_actions:', soft_actions)
-
-##########################################################################
-
-
-# extracting parameters from the dictionary
-# check for all necessary parameters
-# validation of all parameters
-psi_debug = conf_parse_bool('psi_debug')
-print_total_stat = conf_parse_bool('print_total_stat')
-print_proc_table = conf_parse_bool('print_proc_table')
-forbid_negative_badness = conf_parse_bool('forbid_negative_badness')
-print_victim_info = conf_parse_bool('print_victim_info')
-print_config = conf_parse_bool('print_config')
-print_mem_check_results = conf_parse_bool('print_mem_check_results')
-print_sleep_periods = conf_parse_bool('print_sleep_periods')
-gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings')
-gui_notifications = conf_parse_bool('gui_notifications')
-decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj')
-ignore_psi = conf_parse_bool('ignore_psi')
-
-
-# regex_matching = conf_parse_bool('regex_matching')
-# re_match_cmdline = conf_parse_bool('re_match_cmdline')
-# re_match_uid = conf_parse_bool('re_match_uid')
-# re_match_cgroup_v1 = conf_parse_bool('re_match_cgroup_v1')
-# re_match_cgroup_v2 = conf_parse_bool('re_match_cgroup_v2')
-# re_match_realpath = conf_parse_bool('re_match_realpath')
-# re_match_environ = conf_parse_bool('re_match_environ')
-
-
-# if regex_matching or re_match_cmdline or re_match_uid or re_match_cgroup
-# or re_match_realpath:
-# from re import search
-# from sre_constants import error as invalid_re
-
-(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent
- ) = calculate_percent('mem_min_sigterm')
-
-(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent
- ) = calculate_percent('mem_min_sigkill')
-
-(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent
- ) = calculate_percent('zram_max_sigterm')
-
-(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent
- ) = calculate_percent('zram_max_sigkill')
-
-(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent
- ) = calculate_percent('mem_min_warnings')
-
-(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent
- ) = calculate_percent('zram_max_warnings')
-
-
-if 'rate_mem' in config_dict:
- rate_mem = string_to_float_convert_test(config_dict['rate_mem'])
- if rate_mem is None:
- errprint('Invalid rate_mem value, not float\nExit')
- exit(1)
- if rate_mem <= 0:
- errprint('rate_mem MUST be > 0\nExit')
- exit(1)
-else:
- errprint('rate_mem not in config\nExit')
- exit(1)
-
-
-if 'rate_swap' in config_dict:
- rate_swap = string_to_float_convert_test(config_dict['rate_swap'])
- if rate_swap is None:
- errprint('Invalid rate_swap value, not float\nExit')
- exit(1)
- if rate_swap <= 0:
- errprint('rate_swap MUST be > 0\nExit')
- exit(1)
-else:
- errprint('rate_swap not in config\nExit')
- exit(1)
-
-
-if 'rate_zram' in config_dict:
- rate_zram = string_to_float_convert_test(config_dict['rate_zram'])
- if rate_zram is None:
- errprint('Invalid rate_zram value, not float\nExit')
- exit(1)
- if rate_zram <= 0:
- errprint('rate_zram MUST be > 0\nExit')
- exit(1)
-else:
- errprint('rate_zram not in config\nExit')
- exit(1)
-
-
-if 'swap_min_sigterm' in config_dict:
- swap_min_sigterm = config_dict['swap_min_sigterm']
-else:
- errprint('swap_min_sigterm not in config\nExit')
- exit(1)
-
-
-if 'swap_min_sigkill' in config_dict:
- swap_min_sigkill = config_dict['swap_min_sigkill']
-else:
- errprint('swap_min_sigkill not in config\nExit')
- exit(1)
-
-
-if 'min_delay_after_sigterm' in config_dict:
- min_delay_after_sigterm = string_to_float_convert_test(
- config_dict['min_delay_after_sigterm'])
- if min_delay_after_sigterm is None:
- errprint('Invalid min_delay_after_sigterm value, not float\nExit')
- exit(1)
- if min_delay_after_sigterm < 0:
- errprint('min_delay_after_sigterm must be positiv\nExit')
- exit(1)
-else:
- errprint('min_delay_after_sigterm not in config\nExit')
- exit(1)
-
-
-if 'min_delay_after_sigkill' in config_dict:
- min_delay_after_sigkill = string_to_float_convert_test(
- config_dict['min_delay_after_sigkill'])
- if min_delay_after_sigkill is None:
- errprint('Invalid min_delay_after_sigkill value, not float\nExit')
- exit(1)
- if min_delay_after_sigkill < 0:
- errprint('min_delay_after_sigkill must be positive\nExit')
- exit(1)
-else:
- errprint('min_delay_after_sigkill not in config\nExit')
- exit(1)
-
-
-if 'psi_post_action_delay' in config_dict:
- psi_post_action_delay = string_to_float_convert_test(
- config_dict['psi_post_action_delay'])
- if psi_post_action_delay is None:
- errprint('Invalid psi_post_action_delay value, not float\nExit')
- exit(1)
- if psi_post_action_delay < 0:
- errprint('psi_post_action_delay must be positive\nExit')
- exit(1)
-else:
- errprint('psi_post_action_delay not in config\nExit')
- exit(1)
-
-
-if 'sigkill_psi_threshold' in config_dict:
- sigkill_psi_threshold = string_to_float_convert_test(
- config_dict['sigkill_psi_threshold'])
- if sigkill_psi_threshold is None:
- errprint('Invalid sigkill_psi_threshold value, not float\nExit')
- exit(1)
- if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100:
- errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit')
- exit(1)
-else:
- errprint('sigkill_psi_threshold not in config\nExit')
- exit(1)
-
-
-if 'sigterm_psi_threshold' in config_dict:
- sigterm_psi_threshold = string_to_float_convert_test(
- config_dict['sigterm_psi_threshold'])
- if sigterm_psi_threshold is None:
- errprint('Invalid sigterm_psi_threshold value, not float\nExit')
- exit(1)
- if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100:
- errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit')
- exit(1)
-else:
- errprint('sigterm_psi_threshold not in config\nExit')
- exit(1)
-
-
-if 'min_badness' in config_dict:
- min_badness = string_to_int_convert_test(
- config_dict['min_badness'])
- if min_badness is None:
- errprint('Invalid min_badness value, not integer\nExit')
- exit(1)
- if min_badness < 0 or min_badness > 1000:
- errprint('Invalud min_badness value\nExit')
- exit(1)
-else:
- errprint('min_badness not in config\nExit')
- exit(1)
-
-
-if 'oom_score_adj_max' in config_dict:
- oom_score_adj_max = string_to_int_convert_test(
- config_dict['oom_score_adj_max'])
- if oom_score_adj_max is None:
- errprint('Invalid oom_score_adj_max value, not integer\nExit')
- exit(1)
- if oom_score_adj_max < 0 or oom_score_adj_max > 1000:
- errprint('Invalid oom_score_adj_max value\nExit')
- exit(1)
-else:
- errprint('oom_score_adj_max not in config\nExit')
- exit(1)
-
-
-if 'min_time_between_warnings' in config_dict:
- min_time_between_warnings = string_to_float_convert_test(
- config_dict['min_time_between_warnings'])
- if min_time_between_warnings is None:
- errprint('Invalid min_time_between_warnings value, not float\nExit')
- exit(1)
- if min_time_between_warnings < 1 or min_time_between_warnings > 300:
- errprint('min_time_between_warnings value out of range [1; 300]\nExit')
- exit(1)
-else:
- errprint('min_time_between_warnings not in config\nExit')
- exit(1)
-
-
-if 'swap_min_warnings' in config_dict:
- swap_min_warnings = config_dict['swap_min_warnings']
-else:
- errprint('swap_min_warnings not in config\nExit')
- exit(1)
-
-
-if 'max_ancestry_depth' in config_dict:
- max_ancestry_depth = string_to_int_convert_test(
- config_dict['max_ancestry_depth'])
- if min_badness is None:
- errprint('Invalid max_ancestry_depth value, not integer\nExit')
- exit(1)
- if max_ancestry_depth < 1:
- errprint('Invalud max_ancestry_depth value\nExit')
- exit(1)
-else:
- errprint('max_ancestry_depth is not in config\nExit')
- exit(1)
-
-
-if 'max_post_sigterm_victim_lifetime' in config_dict:
- max_post_sigterm_victim_lifetime = string_to_float_convert_test(
- config_dict['max_post_sigterm_victim_lifetime'])
- if max_post_sigterm_victim_lifetime is None:
- errprint('Invalid max_post_sigterm_victim_lifetime val'
- 'ue, not float\nExit')
- exit(1)
- if max_post_sigterm_victim_lifetime < 0:
- errprint('max_post_sigterm_victim_lifetime must be non-n'
- 'egative number\nExit')
- exit(1)
-else:
- errprint('max_post_sigterm_victim_lifetime is not in config\nExit')
- exit(1)
-
-
-if 'post_kill_exe' in config_dict:
- post_kill_exe = config_dict['post_kill_exe']
-else:
- errprint('post_kill_exe is not in config\nExit')
- exit(1)
-
-
-if 'psi_path' in config_dict:
- psi_path = config_dict['psi_path']
-else:
- errprint('psi_path is not in config\nExit')
- exit(1)
-
-
-if 'psi_metrics' in config_dict:
- psi_metrics = config_dict['psi_metrics']
-else:
- errprint('psi_metrics is not in config\nExit')
- exit(1)
-
-
-if 'warning_exe' in config_dict:
- warning_exe = config_dict['warning_exe']
- if warning_exe != '':
- check_warning_exe = True
- else:
- check_warning_exe = False
-else:
- errprint('warning_exe is not in config\nExit')
- exit(1)
-
-
-if 'extra_table_info' in config_dict:
- extra_table_info = config_dict['extra_table_info']
- if (extra_table_info != 'None' and
- extra_table_info != 'cgroup_v1' and
- extra_table_info != 'cgroup_v2' and
- extra_table_info != 'cmdline' and
- extra_table_info != 'environ' and
- extra_table_info != 'realpath' and
- extra_table_info != 'All'):
-
- errprint('Invalid config: invalid extra_table_info value\nExit')
- exit(1)
-else:
- errprint('Invalid config: extra_table_info is not in config\nExit')
- exit(1)
-
-
-separate_log = conf_parse_bool('separate_log')
-
-if separate_log:
-
- import logging
- from logging import basicConfig
- from logging import info
-
- log_dir = '/var/log/nohang'
-
- try:
- os.mkdir(log_dir)
- except PermissionError:
- print('ERROR: can not create log dir')
- except FileExistsError:
- pass
-
- logfile = log_dir + '/nohang.log'
-
- try:
- with open(logfile, 'a') as f:
- pass
- except FileNotFoundError:
- print('ERROR: log FileNotFoundError')
- except PermissionError:
- print('ERROR: log PermissionError')
-
- try:
- basicConfig(
- filename=logfile,
- level=logging.INFO,
- format="%(asctime)s: %(message)s")
- except PermissionError:
- errprint('ERROR: Permission denied: {}'.format(logfile))
- except FileNotFoundError:
- errprint('ERROR: FileNotFoundError: {}'.format(logfile))
-
-
-if 'min_mem_report_interval' in config_dict:
- min_mem_report_interval = string_to_float_convert_test(
- config_dict['min_mem_report_interval'])
- if min_mem_report_interval is None:
- errprint('Invalid min_mem_report_interval value, not float\nExit')
- exit(1)
- if min_mem_report_interval < 0:
- errprint('min_mem_report_interval must be non-negative number\nExit')
- exit(1)
-else:
- errprint('min_mem_report_interval is not in config\nExit')
- exit(1)
-
-
-if 'max_sleep_time' in config_dict:
- max_sleep_time = string_to_float_convert_test(
- config_dict['max_sleep_time'])
- if max_sleep_time is None:
- errprint('Invalid max_sleep_time value, not float\nExit')
- exit(1)
- if max_sleep_time <= 0:
- errprint('max_sleep_time must be positive number\nExit')
- exit(1)
-else:
- errprint('max_sleep_time is not in config\nExit')
- exit(1)
-
-
-if 'min_sleep_time' in config_dict:
- min_sleep_time = string_to_float_convert_test(
- config_dict['min_sleep_time'])
- if min_sleep_time is None:
- errprint('Invalid min_sleep_time value, not float\nExit')
- exit(1)
- if min_sleep_time <= 0:
- errprint('min_sleep_time must be positive number\nExit')
- exit(1)
-else:
- errprint('min_sleep_time is not in config\nExit')
- exit(1)
-
-
-if max_sleep_time < min_sleep_time:
- errprint(
- 'max_sleep_time value must not exceed min_sleep_time value.\nExit'
- )
- exit(1)
-
-
-if print_proc_table_flag:
-
- if not root:
- log('WARNING: effective UID != 0; euid={}; processes with other e'
- 'uids will be invisible for nohang'.format(self_uid))
-
- func_print_proc_table()
-
-
-##########################################################################
-
-
-psi_support = os.path.exists(psi_path)
-
-
-##########################################################################
-
-
-# Get KiB levels if it's possible.
-
-# получ кб. если не кб - то процент. Если процент - находим кб ниже на
-# основе полученного своптотал и процентов.
-
-
-def get_swap_threshold_tuple(string):
- # re (Num %, True) or (Num KiB, False)
- """Returns KiB value if abs val was set in config, or tuple with %"""
- # return tuple with abs and bool: (abs %, True) or (abs MiB, False)
-
- if string.endswith('%'):
- valid = string_to_float_convert_test(string[:-1])
- if valid is None:
- errprint('somewhere swap unit is not float_%')
- exit(1)
-
- value = float(string[:-1].strip())
- if value < 0 or value > 100:
- errprint('invalid value, must be from the range[0; 100] %')
- exit(1)
-
- return value, True
-
- elif string.endswith('M'):
- valid = string_to_float_convert_test(string[:-1])
- if valid is None:
- errprint('somewhere swap unit is not float_M')
- exit(1)
-
- value = float(string[:-1].strip()) * 1024
- if value < 0:
- errprint('invalid unit in config (negative value)')
- exit(1)
-
- return value, False
-
- else:
- errprint(
- 'Invalid config file. There are invalid units somewhere\nExit')
- exit(1)
-
-
-swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm)
-swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill)
-swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings)
-
-
-swap_term_is_percent = swap_min_sigterm_tuple[1]
-if swap_term_is_percent:
- swap_min_sigterm_percent = swap_min_sigterm_tuple[0]
-else:
- swap_min_sigterm_kb = swap_min_sigterm_tuple[0]
-
-
-swap_kill_is_percent = swap_min_sigkill_tuple[1]
-if swap_kill_is_percent:
- swap_min_sigkill_percent = swap_min_sigkill_tuple[0]
-else:
- swap_min_sigkill_kb = swap_min_sigkill_tuple[0]
-
-
-swap_warn_is_percent = swap_min_warnings_tuple[1]
-if swap_warn_is_percent:
- swap_min_warnings_percent = swap_min_warnings_tuple[0]
-else:
- swap_min_warnings_kb = swap_min_warnings_tuple[0]
-
-
-##########################################################################
-
-# outdated section, need fixes
-
-if print_config:
-
- print(
- '\n1. Memory levels to respond to as an OOM threat\n[display'
- 'ing these options need fix]\n')
-
- print('mem_min_sigterm: {} MiB, {} %'.format(
- round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1)))
- print('mem_min_sigkill: {} MiB, {} %'.format(
- round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1)))
-
- print('swap_min_sigterm: {}'.format(swap_min_sigterm))
- print('swap_min_sigkill: {}'.format(swap_min_sigkill))
-
- print('zram_max_sigterm: {} MiB, {} %'.format(
- round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1)))
- print('zram_max_sigkill: {} MiB, {} %'.format(
- round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1)))
-
- print('\n2. The frequency of checking the level of available m'
- 'emory (and CPU usage)\n')
- print('rate_mem: {}'.format(rate_mem))
- print('rate_swap: {}'.format(rate_swap))
- print('rate_zram: {}'.format(rate_zram))
-
- print('\n3. The prevention of killing innocent victims\n')
- print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm))
- print('min_delay_after_sigkill: {}'.format(min_delay_after_sigkill))
- print('min_badness: {}'.format(min_badness))
-
- # False (OK) - OK не нужен когда фолс
- print('decrease_oom_score_adj: {}'.format(
- decrease_oom_score_adj
- ))
- if decrease_oom_score_adj:
- print('oom_score_adj_max: {}'.format(oom_score_adj_max))
-
- print('\n4. Impact on the badness of processes via matching their'
- ' names, cmdlines ir UIDs with regular expressions\n')
-
- print('(todo)')
-
- print('\n5. The execution of a specific command instead of sen'
- 'ding the\nSIGTERM signal\n')
-
- print('\n6. GUI notifications:\n- OOM prevention results and\n- low m'
- 'emory warnings\n')
- print('gui_notifications: {}'.format(gui_notifications))
-
- print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings))
- if gui_low_memory_warnings:
- print('min_time_between_warnings: {}'.format(
- min_time_between_warnings))
-
- print('mem_min_warnings: {} MiB, {} %'.format(
- round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1)))
-
- print('swap_min_warnings: {}'.format(swap_min_warnings))
-
- print('zram_max_warnings: {} MiB, {} %'.format(
- round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1)))
-
- print('\n7. Output verbosity\n')
- print('print_config: {}'.format(print_config))
- print('print_mem_check_results: {}'.format(print_mem_check_results))
- print('print_sleep_periods: {}\n'.format(print_sleep_periods))
-
-
-##########################################################################
-
-
-# for calculating the column width when printing mem and zram
-mem_len = len(str(round(mem_total / 1024.0)))
-
-if gui_notifications:
- notify_sig_dict = {SIGKILL: 'Killing',
- SIGTERM: 'Terminating'}
-
-
-# convert rates from MiB/s to KiB/s
-rate_mem = rate_mem * 1024
-rate_swap = rate_swap * 1024
-rate_zram = rate_zram * 1024
-
-
-warn_time_now = 0
-warn_time_delta = 1000
-warn_timer = 0
-
-
-##########################################################################
-
-
-
-
-if not root:
- log('WARNING: effective UID != 0; euid={}; processes with other e'
- 'uids will be invisible for nohang'.format(self_uid))
-
-
-
-# Try to lock all memory
-
-mlockall()
-
-##########################################################################
-
-
-
-print_self_rss()
-
-
-# if print_proc_table:
-# find_victim(print_proc_table)
-
-log('Monitoring has started!')
-
-stdout.flush()
-
-##########################################################################
-
-psi_avg_string = '' # will be overwritten if PSI monitoring enabled
-
-
-if psi_support and not ignore_psi:
- psi_t0 = time()
-
-
-if print_mem_check_results:
-
- # to find delta mem
- wt2 = 0
- new_mem = 0
-
- # init mem report interval
- report0 = 0
-
-
-# handle signals
-sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP, SIGABRT, SIGSEGV, SIGBUS]
-for signum in sig_list:
- signal(signum, signal_handler)
-
-
-while True:
-
- if psi_support and not ignore_psi:
-
- psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics)
-
- if print_mem_check_results:
- psi_avg_string = 'PSI avg value: {} | '.format(
- str(psi_avg_value).rjust(6))
-
- if psi_avg_value >= sigkill_psi_threshold:
- sigkill_psi_exceeded = True
- else:
- sigkill_psi_exceeded = False
-
- if psi_avg_value >= sigterm_psi_threshold:
- sigterm_psi_exceeded = True
- else:
- sigterm_psi_exceeded = False
-
- if time() - psi_t0 >= psi_post_action_delay:
- psi_post_action_delay_exceeded = True
- else:
- psi_post_action_delay_exceeded = False
-
- if psi_debug:
- log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps'
- 'i_post_action_delay_exceeded: {}'.format(
- sigterm_psi_exceeded,
- sigkill_psi_exceeded,
- psi_post_action_delay_exceeded))
-
- if sigkill_psi_exceeded and psi_post_action_delay_exceeded:
- time0 = time()
- mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \
- 'old ({})'.format(
- psi_avg_value, sigkill_psi_threshold)
-
- implement_corrective_action(SIGKILL)
-
- psi_t0 = time()
- continue
-
- if sigterm_psi_exceeded and psi_post_action_delay_exceeded:
- time0 = time()
- mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \
- 'shold ({})'.format(psi_avg_value, sigterm_psi_threshold)
-
- implement_corrective_action(SIGTERM)
-
- psi_t0 = time()
- continue
-
- mem_available, swap_total, swap_free = check_mem_and_swap()
-
- # print(mem_available, swap_total, swap_free)
-
- # если метры - получаем киб выше и сразу. см.
-
- # if swap_min_sigkill is set in percent
- if swap_kill_is_percent:
- swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0
-
- if swap_term_is_percent:
- swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0
-
- if swap_warn_is_percent:
- swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0
-
- # в общем случае для работы нужны килобайты. Если в процентах задано -
- # находим КБ тут, после получения своптотал.
-
- mem_used_zram = check_zram()
-
- if print_mem_check_results:
-
- wt1 = time()
-
- delta = (mem_available + swap_free) - new_mem
-
- t_cycle = wt1 - wt2
-
- report_delta = wt1 - report0
-
- if report_delta >= min_mem_report_interval:
-
- mem_report = True
- new_mem = mem_available + swap_free
-
- report0 = wt1
-
- else:
- mem_report = False
-
- wt2 = time()
-
- if mem_report:
-
- speed = delta / 1024.0 / report_delta
- speed_info = ' | dMem: {} M/s'.format(
- str(round(speed)).rjust(5)
- )
-
- # Calculate 'swap-column' width
- swap_len = len(str(round(swap_total / 1024.0)))
-
- # Output available mem sizes
- if swap_total == 0 and mem_used_zram == 0:
- log('{}MemAvail: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- speed_info
- )
- )
-
- elif swap_total > 0 and mem_used_zram == 0:
- log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- human(swap_free, swap_len),
- just_percent_swap(swap_free / (swap_total + 0.1)),
- speed_info
- )
- )
-
- else:
- log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem'
- 'UsedZram: {} M, {} %{}'.format(
- psi_avg_string,
- human(mem_available, mem_len),
- just_percent_mem(mem_available / mem_total),
- human(swap_free, swap_len),
- just_percent_swap(swap_free / (swap_total + 0.1)),
- human(mem_used_zram, mem_len),
- just_percent_mem(mem_used_zram / mem_total),
- speed_info
- )
- )
-
- # если swap_min_sigkill задан в абсолютной величине и Swap_total = 0
- if swap_total > swap_min_sigkill_kb: # If swap_min_sigkill is absolute
- swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1))
- else:
- swap_sigkill_pc = '-'
-
- if swap_total > swap_min_sigterm_kb:
- swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1))
- else:
-
- # печатать так: SwapTotal = 0, ignore swapspace
- swap_sigterm_pc = '-'
-
- # это для печати меминфо. Все переработать нахрен.
-
- # далее пошла проверка превышения порогов
-
- # MEM SWAP KILL
- if (mem_available <= mem_min_sigkill_kb and
- swap_free <= swap_min_sigkill_kb):
- time0 = time()
-
- mem_info = 'Hard threshold exceeded\nMemory status that requ' \
- 'ires corrective actions:' \
- '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
- 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
- 'p_min_sigkill [{} MiB, {} %]'.format(
- kib_to_mib(mem_available),
- percent(mem_available / mem_total),
- kib_to_mib(mem_min_sigkill_kb),
- percent(mem_min_sigkill_kb / mem_total),
- kib_to_mib(swap_free),
- percent(swap_free / (swap_total + 0.1)),
- kib_to_mib(swap_min_sigkill_kb),
- swap_sigkill_pc)
-
- implement_corrective_action(SIGKILL)
-
- psi_t0 = time()
- continue
-
- # ZRAM KILL
- if mem_used_zram >= zram_max_sigkill_kb:
- time0 = time()
-
- mem_info = 'Hard threshold exceeded\nMemory status that requir' \
- 'es corrective actions:' \
- '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \
- 'kill [{} MiB, {} %]'.format(
- kib_to_mib(mem_used_zram),
- percent(mem_used_zram / mem_total),
- kib_to_mib(zram_max_sigkill_kb),
- percent(zram_max_sigkill_kb / mem_total))
-
- implement_corrective_action(SIGKILL)
-
- psi_t0 = time()
- continue
-
- # MEM SWAP TERM
- if mem_available <= mem_min_sigterm_kb and \
- swap_free <= swap_min_sigterm_kb:
-
- time0 = time()
-
- mem_info = 'Soft threshold exceeded\nMemory status that requi' \
- 'res corrective actions:' \
- '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \
- 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \
- 'p_min_sigterm [{} MiB, {} %]'.format(
- kib_to_mib(mem_available),
- percent(mem_available / mem_total),
- kib_to_mib(mem_min_sigterm_kb),
- # percent(mem_min_sigterm_kb / mem_total),
- # ОКРУГЛЯТЬ НА МЕСТЕ ВЫШЕ (или не выше, хз)
- round(mem_min_sigterm_percent, 1),
- kib_to_mib(swap_free),
- percent(swap_free / (swap_total + 0.1)),
- kib_to_mib(swap_min_sigterm_kb),
- swap_sigterm_pc)
-
- implement_corrective_action(SIGTERM)
-
- psi_t0 = time()
- continue
-
- # ZRAM TERM
- if mem_used_zram >= zram_max_sigterm_kb:
- time0 = time()
-
- mem_info = 'Soft threshold exceeded\nMemory status that requ' \
- 'ires corrective actions:' \
- '\n MemUsedZram [{} MiB, {} %] >= ' \
- 'zram_max_sigterm [{} M, {} %]'.format(
- kib_to_mib(mem_used_zram),
- percent(mem_used_zram / mem_total),
- kib_to_mib(zram_max_sigterm_kb),
- percent(zram_max_sigterm_kb / mem_total))
-
- implement_corrective_action(SIGTERM)
-
- psi_t0 = time()
- continue
-
- # LOW MEMORY WARNINGS
- if gui_low_memory_warnings:
-
- if mem_available <= mem_min_warnings_kb and \
- swap_free <= swap_min_warnings_kb + 0.1 or \
- mem_used_zram >= zram_max_warnings_kb:
- warn_time_delta = time() - warn_time_now
- warn_time_now = time()
- warn_timer += warn_time_delta
- if warn_timer > min_time_between_warnings:
- send_notify_warn()
- warn_timer = 0
-
- # SLEEP BETWEEN MEM CHECKS
- sleep_after_check_mem()
diff --git a/trash/nonascii-nohang.conf b/trash/nonascii-nohang.conf
deleted file mode 100644
index e969359..0000000
--- a/trash/nonascii-nohang.conf
+++ /dev/null
@@ -1,427 +0,0 @@
-
- This is nohang config file.
-
- Redesign of this config in progress.
-
- Lines starting with #, tabs and spaces are comments.
-
- Lines starting with $ contain obligatory parameters.
-
- Lines starting with @ contain optional parameters.
-
- The configuration includes the following sections:
-
- 1. Memory levels to respond to as an OOM threat
- 2. Response on PSI memory metrics
- 3. The frequency of checking the level of available memory
- (and CPU usage)
- 4. The prevention of killing innocent victims
- 5. Impact on the badness of processes via matching their
- - names,
- - cmdlines and
- - UIDs
- with regular expressions
- 6. The execution of a specific command instead of sending the
- SIGTERM signal
- 7. GUI notifications:
- - OOM prevention results and
- - low memory warnings
- 8. Output verbosity
- 9. Misc
-
- Just read the description of the parameters and edit the values.
- Please restart the program after editing the config.
-
-#####################################################################
-
- 1. Thresholds below which a signal should be sent to the victim
-
- Sets the available memory levels at or below which SIGTERM or SIGKILL
- signals are sent. The signal will be sent if MemAvailable and
- SwapFree (in /proc/meminfo) at the same time will drop below the
- corresponding values. Can be specified in % (percent) and M (MiB).
- Valid values are floating-point numbers from the range [0; 100] %.
-
- MemAvailable levels.
-
-mem_min_sigterm = 10 %
-mem_min_sigkill = 5 %
-
- SwapFree levels.
-
-swap_min_sigterm = 10 %
-swap_min_sigkill = 5 %
-
- Specifying the total share of zram in memory, if exceeded the
- corresponding signals are sent. As the share of zram in memory
- increases, it may fall responsiveness of the system. 90 % is a
- usual hang level, not recommended to set very high.
-
- Can be specified in % and M. Valid values are floating-point
- numbers from the range [0; 90] %.
-
-zram_max_sigterm = 50 %
-zram_max_sigkill = 55 %
-
-#####################################################################
-
- 2. Response on PSI memory metrics (it needs Linux 4.20 and up)
-
- About PSI:
- https://facebookmicrosites.github.io/psi/
-
- Disabled by default (ignore_psi = True).
-
-ignore_psi = True
-
- Choose a path to PSI file.
- By default it monitors system-wide file: /proc/pressure/memory
- You also can set file to monitor one cgroup slice.
- For example:
- psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure
- psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure
- psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure
-
- Execute the command
- find /sys/fs/cgroup | grep -P "memory\.pressure$"
- to find available memory.pressue files (except /proc/pressure/memory).
-
-psi_path = /proc/pressure/memory
-
- Valid psi_metrics are:
- some_avg10
- some_avg60
- some_avg300
- full_avg10
- full_avg60
- full_avg300
-
- some_avg10 is most sensitive.
-
-psi_metrics = some_avg10
-
-sigterm_psi_threshold = 80
-sigkill_psi_threshold = 90
-
-psi_post_action_delay = 60
-
-#####################################################################
-
- 3. The frequency of checking the amount of available memory
- (and CPU usage)
-
- Coefficients that affect the intensity of monitoring. Reducing
- the coefficients can reduce CPU usage and increase the periods
- between memory checks.
-
- Why three coefficients instead of one? Because the swap fill rate
- is usually lower than the RAM fill rate.
-
- It is possible to set a lower intensity of monitoring for swap
- without compromising to prevent OOM and thus reduce the CPU load.
-
- Default values are well for desktop. On servers without rapid
- fluctuations in memory levels the values can be reduced.
-
- Valid values are positive floating-point numbers.
-
-rate_mem = 4000
-rate_swap = 1500
-rate_zram = 500
-
- See also https://github.com/rfjakob/earlyoom/issues/61
-
-
- Максимальное время сна между проверками памяти.
- Положительное число.
-
-max_sleep_time = 3
-
- Минимальное время сна между проверками памяти.
- Положительное число, не превышающее max_sleep_time.
-
-min_sleep_time = 0.1
-
-#####################################################################
-
- 4. The prevention of killing innocent victims
-
- Минимальное значение bandess (по умолчанию равно oom_score),
- которым должен обладать
- процесс для того, чтобы ему был отправлен сигнал.
- Позволяет предотвратить убийство невиновных если что-то
- пойдет не так.
-
- Valid values are integers from the range [0; 1000].
-
-min_badness = 20
-
- Минимальная задержка после отправки соответствующих сигналов
- для предотвращения риска убийства сразу множества процессов.
-
- Valid values are non-negative floating-point numbers.
-
-min_delay_after_sigterm = 0.2
-min_delay_after_sigkill = 1
-
- Процессы браузера chromium обычно имеют oom_score_adj
- 200 или 300. Это приводит к тому, что процессы хрома умирают
- первыми вместо действительно тяжелых процессов.
- Если параметр decrease_oom_score_adj установлен
- в значение True, то у процессов, имеющих oom_score_adj выше
- oom_score_adj_max значение oom_score_adj будет опущено
- до oom_score_adj_max перед поиском жертвы.
-
- Enabling the option requires root privileges.
- Valid values are True and False.
- Values are case sensitive.
-
-decrease_oom_score_adj = False
-
- Valid values are integers from the range [0; 1000].
-
-oom_score_adj_max = 20
-
-#####################################################################
-
- 5. Impact on the badness of processes via matching their names,
- cmdlines or UIDs with regular expressions using re.search().
-
- See https://en.wikipedia.org/wiki/Regular_expression and
- https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions
-
- Enabling this options slows down the search for the victim
- because the names, cmdlines or UIDs of all processes
- (except init and kthreads) are compared with the
- specified regex patterns (in fact slowing down is caused by
- reading all /proc/*/cmdline and /proc/*/status files).
-
- Use script `oom-sort` from nohang package to view
- names, cmdlines and UIDs of processes.
-
-
- 5.1 Matching process names with RE patterns
-
- Valid values are True and False.
-
-regex_matching = False
-
- Syntax:
-
- @PROCESSNAME_RE badness_adj /// RE_pattern
-
- New badness value will be += badness_adj
-
- It is possible to compare multiple patterns
- with different badness_adj values.
-
- Example:
-
-@PROCESSNAME_RE -100 /// ^Xorg$
-
-@PROCESSNAME_RE -500 /// ^sshd$
-
- 5.2 Matching cmdlines with RE patterns
-
- A good option that allows fine adjustment.
-
-re_match_cmdline = False
-
-@CMDLINE_RE 300 /// -childID|--type=renderer
-
-@CMDLINE_RE -200 /// ^/usr/lib/virtualbox
-
-
- 5.3 Matching UIDs with RE patterns
-
- The most slow option
-
-re_match_uid = False
-
-@UID_RE -100 /// ^0$
-
- 5.4 Matching CGroup-line with RE patterns
-
-re_match_cgroup = True
-
- @CGROUP_RE -50 /// system.slice
-
- @CGROUP_RE 50 /// foo.service
-@CGROUP_RE 2000 /// user.slice
-
- 5.5 Matching realpath with RE patterns
-
-re_match_realpath = False
-
-@REALPATH_RE 20 /// ^/usr/bin/foo
-
- Note that you can control badness also via systemd units via OOMScoreAdjust, see
- https://www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
-
-#####################################################################
-
- 6. The execution of a specific command instead of sending the
- SIGTERM signal.
-
- For processes with a specific name you can specify a command to
- run instead of sending the SIGTERM signal.
-
- For example, if the process is running as a daemon, you can run
- the restart command instead of sending SIGTERM.
-
- Valid values are True and False.
-
-execute_the_command = False
-
- The length of the process name can't exceed 15 characters.
- The syntax is as follows: lines starting with keyword $ETC are
- considered as the lines containing names of processes and
- corresponding commands. After a name of process the triple slash
- (///) follows. And then follows the command that will be
- executed if the specified process is selected as a victim. The
- ampersand (&) at the end of the command will allow nohang to
- continue runing without waiting for the end of the command
- execution.
-
- For example:
- $ETC mysqld /// systemctl restart mariadb.service &
- $ETC php-fpm7.0 /// systemctl restart php7.0-fpm.service
-
- If command will contain $PID pattern, this template ($PID) will
- be replaced by PID of process which name match with RE pattern.
-
- Exmple:
-
- $ETC bash /// kill -KILL $PID
-
- It is way to send any signal instead of SIGTERM.
- (run `kill -L` to see list of all signals)
-
- Also $NAME will be replaced by process name.
-
- $ETC bash /// kill -9 $PID
-
-$ETC firefox-esr /// kill -SEGV $PID
-
-$ETC tail /// kill -9 $PID
-
-$ETC apache2 /// systemctl restart apache2
-
-
-#####################################################################
-
- 7. GUI notifications:
- - OOM prevention results and
- - low memory warnings
-
- Включение этой опции требует наличия notify-send в системе.
- В Debian/Ubuntu это обеспечивается установкой пакета
- libnotify-bin. В Fedora и Arch Linux - пакет libnotify.
- Также требуется наличие сервера уведомлений.
- При запуске nohang от рута уведомления рассылаются всем
- залогиненным пользователям.
- See also wiki.archlinux.org/index.php/Desktop_notifications
- Valid values are True and False.
-
-gui_notifications = False
-
- Enable GUI notifications about the low level of available memory.
- Valid values are True and False.
-
-gui_low_memory_warnings = False
-
- Execute the command instead of sending GUI notifications if the value is
- not empty line. For example:
- warning_exe = cat /proc/meminfo &
-
-warning_exe =
-
- Если значения MemAvailable и SwapFree одновременно будут ниже
- соотвестствующих значений, то будут отправлены уведомления.
-
- Can be specified in % (percent) and M (MiB).
- Valid values are floating-point numbers from the range [0; 100] %.
-
-mem_min_warnings = 25 %
-
-swap_min_warnings = 25 %
-
- Если доля zram в памяти превысит значение zram_max_warnings,
- то будут отправляться уведомления с минимальным периодом равным
- min_time_between_warnings.
-
-zram_max_warnings = 40 %
-
- Минимальное время между отправками уведомлений в секундах.
- Valid values are floating-point numbers from the range [1; 300].
-
-min_time_between_warnings = 15
-
- Ampersands (&) will be replaced with asterisks (*) in process
- names and in commands.
-
-#####################################################################
-
- 8. Verbosity
-
- Display the configuration when the program starts.
- Valid values are True and False.
-
-print_config = False
-
- Print memory check results.
- Valid values are True and False.
-
-print_mem_check_results = False
-
- Минимальная периодичность печати состояния памяти.
- 0 - печатать все проверки памяти.
- Неотрицательное число.
-
-min_mem_report_interval = 60
-
- Print sleep periods between memory checks.
- Valid values are True and False.
-
-print_sleep_periods = False
-
- Печатать общую статистику по корректирующим действиям с момента
- запуска nohang после каждого корректирующего действия.
-
-print_total_stat = True
-
- Печатать таблицу процессов перед каждым корректирующим действием.
-
-print_proc_table = False
-
-print_victim_info = True
-
- Максимальная глубина показа родословной жертвы.
- По умолчанию (1) показывается только родитель - PPID.
- Целое положительное число.
-
-max_ancestry_depth = 1
-
-separate_log = False
-
-psi_debug = False
-
-#####################################################################
-
- 9. Misc
-
- Жертва может не реагировать на SIGTERM.
- max_post_sigterm_victim_lifetime - это время, при превышении
- которого жертва получит SIGKILL.
- Неотрицательные числа.
-
-max_post_sigterm_victim_lifetime = 10
-
- Execute the command after sending SIGKILL to the victim if the value is
- not empty line. For example:
- post_kill_exe = cat /proc/meminfo &
-
-post_kill_exe =
-
-forbid_negative_badness = True
-
diff --git a/trash/oom-trigger b/trash/oom-trigger
deleted file mode 100755
index f0dcdf5..0000000
--- a/trash/oom-trigger
+++ /dev/null
@@ -1,194 +0,0 @@
-#!/usr/bin/env python3
-
-# интерактивный oom-trigger
-
-from memco import *
-
-from signal import signal, SIGTERM
-from time import sleep
-from sys import exit
-
-
-def signal_handler(signum, frame):
- print('Got signal {}'.format(signum))
- # sleep(1)
- # exit()
-
-
-signal(SIGTERM, signal_handler)
-
-
-
-# печать показателей на этапах работы
-def print_mem():
-
- mem_tup = mem_check_main()
-
- mem_available = mem_tup[0]
- swap_total = mem_tup[1]
- swap_free = mem_tup[2]
-
- print(
- 'MemAvailable: ', round(mem_available / 1024 / 1024, 3), 'GiB,', round(mem_available / 1024), 'MiB,', round(mem_available / mem_total * 100, 1), '%'
- )
-
- if swap_total != 0:
- print(
- 'SwapFree: ', round(swap_free / 1024 / 1024, 3), 'GiB,', round(swap_free / 1024), 'MiB,', round(swap_free / swap_total * 100, 1), '%'
- )
- print(
- 'Total Free: ', round((mem_available + swap_free) / 1024 / 1024, 3), 'GiB,', round((mem_available + swap_free) / 1024), 'MiB,', round((mem_available + swap_free) / (mem_total + swap_total) * 100, 1), '%'
- )
- else:
- print(
- 'Swap disabled'
- )
-
-
-
-# бесконечный жор
-def inf():
-
- print(
- 'Вводите целые неотрицательные числа. Чем больше, тем быстрее потребление памяти.\n1000 same обеспечивает потребление на уровне полтора гиг в секунду,\nurandom работает на скорости максимум 170 M/s'
- )
- same = input("same: ")
- urandom = input("urandom: ")
-
- expanding_list = []
-
- print(
- 'Процесс неограниченного потребления пошёл... Press Ctrl + C for exit'
- )
-
- while True:
- try:
- expanding_list.append(os.urandom(int(urandom)))
- expanding_list.append('#' * int(same))
- except MemoryError:
- print('MemoryError, start побайтовая добивалка!')
- terminal()
-
-
-
-
-# жор числп гиг
-def lim():
-
- expanding_list = []
-
- n = input('На сколько гигабайт уменьшить доступную память?\n: ')
-
- print('Погнали тратить ' + n + ' гиг...')
-
- i = 0
-
- while True:
-
- i += 1
-
- try:
- expanding_list.append(os.urandom(int(100)))
- expanding_list.append('#' * int(300))
- except MemoryError:
- print('MemoryError!')
- break
- if i > 2020202 * int(n):
- print('DONE')
- break
-
- return expanding_list
-
-
-
-
-# жор до остатка мегабайт
-def lim2avail():
-
- expanding_list = []
-
- n = input(
- 'Сколько мегабайт общей доступной памяти (MemAvailable + SwapFree) оставить?\nВведите целое положительное число: '
- )
-
- # проверка на целое положительное
- if n.isdigit() == True:
- n = int(n)
- else:
- print(
- 'Вы ввели не целое положительное число'
- )
- return 0
-
- if n == 0:
- print(
- 'Вы ввели не целое положительное число'
- )
- return 0
-
- print(
- 'Погнали уменьшать доступную память до уровня ниже ' + str(n) + ' MiB...'
- )
-
- while True:
- try:
- expanding_list.append(os.urandom(5000))
- expanding_list.append('#' * 5000)
- except MemoryError:
- print('MemoryError!')
- break
- if total_mem_available() <= n:
- print('DONE')
- break
-
- return expanding_list
-
-
-
-
-
-
-print('WARNING: эта прога способна потратить память и повесить систему, будьте осторожны.')
-print('При ее работе следите за показателями памяти.')
-
-ex_list = []
-
-try:
- while True:
-
- print()
- print_mem()
- print()
- print('Выберите вариант из списка ниже')
- print('8 или i или I - запустить бесконечное потребление, предложив выбрать скорость потребления и энтропию')
- print('7 или l или L - запустить ограниченное потребление заданного числа гигов')
- print('6 или a или A - жрать память пока количество доступной памяти не опустится ниже заданного')
- print('0 или с или С - очистить накопления при их наличии')
- print('q или любой другой символ - выход (можно просто нажать Enter)')
-
- li = input(': ')
-
- if li is 'l' or li is 'L' or li is '7':
- x = lim()
- ex_list.append(x)
- elif li is 'i' or li is 'I' or li is '8':
- inf()
- elif li is 'c' or li is 'C' or li is '0':
- ex_list = []
- x = 0
- y = 0
- elif li is '6' or li is 'a' or li is 'A':
- y = lim2avail()
- ex_list.append(y)
- else:
- exit()
-
-except KeyboardInterrupt:
- print()
- print_mem()
- selfterm()
-
-
-
-
-
diff --git a/trash/proc2log b/trash/proc2log
deleted file mode 100755
index 14127f5..0000000
--- a/trash/proc2log
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/usr/bin/env python3
-
-# proc2log: new processes monitor
-
-import os
-import argparse
-from time import sleep
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
- '-p',
- '--period',
- help='peroid in seconds between proc checks, default value: 0.1',
- default=0.1,
- type=float
- )
-args = parser.parse_args()
-period = args.period
-
-unknown_name = ''
-
-
-# получение чистого пид сета
-def foo():
- proc_list = os.listdir('/proc')
- proc_set = set()
- for i in proc_list:
- if i.isdigit() != True:
- continue
- proc_set.add(i)
- return proc_set
-
-
-# имя через пид
-def pid_to_name(pid):
- try:
- with open('/proc/' + pid + '/status') as f:
- for line in f:
- return line[:-1].split('\t')[1]
- except FileNotFoundError:
- return unknown_name
-
-
-# печать одного пида и имени
-def print_pid(pid):
-
- try:
- with open('/proc/' + pid + '/status') as f:
- for lineno, line in enumerate(f):
- name = line[:-1].split('\t')[1]
- print('+ {}, {}'.format(pid, name))
- pid_dict[pid] = name
- if lineno >= 0:
- break
-
- except FileNotFoundError:
- name = pid_dict.pop(pid, unknown_name)
- print(' - {}, {}'.format(pid, name))
-
- except ProcessLookupError:
- name = pid_dict.pop(pid, unknown_name)
- print(' - {}, {}'.format(pid, name))
-
-
-# нахождение и печать дельт сетов
-def delta(old_set):
-
- new_set = set(os.listdir('/proc'))
- plus = new_set - old_set
- minus = old_set - new_set
-
- if len(plus) > 0:
- for pid in plus:
- print_pid(pid)
-
- if len(minus) > 0:
- for pid in minus:
- print_pid(pid)
-
- return new_set
-
-
-print('proc2log started with period {} seconds'.format(period))
-print('+ PID, NAME - PID, NAME')
-
-# первичное наполнение словаря значениями pid:name для печати имён исчезнувших процессов
-pid_dict = dict()
-for pid in foo():
- pid_dict[pid] = pid_to_name(pid)
-
-pid_set = set(os.listdir('/proc'))
-try:
- while True:
- pid_set = delta(pid_set)
- sleep(period)
-except KeyboardInterrupt:
- exit()
-
diff --git a/trash/psi-monitor b/trash/psi-monitor
deleted file mode 100755
index 70b4c42..0000000
--- a/trash/psi-monitor
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python3
-
-from ctypes import CDLL
-from time import sleep
-from sys import argv
-
-"""
- Execute the command
- find /sys/fs/cgroup -name memory.pressure
- to find available memory.pressue files (except /proc/pressure/memory).
- (actual for cgroup2)
-"""
-
-if len(argv) > 1:
- psi_path = argv[1]
-else:
- psi_path = '/proc/pressure/memory'
-
-
-def mlockall():
-
- MCL_CURRENT = 1
- MCL_FUTURE = 2
- MCL_ONFAULT = 4
-
- libc = CDLL('libc.so.6', use_errno=True)
-
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
- )
- if result != 0:
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE
- )
- if result != 0:
- print('WARNING: cannot lock all memory')
- else:
- pass
- else:
- pass
-
-
-mlockall()
-
-
-def psi_path_to_metrics(psi_path):
-
- with open(psi_path) as f:
- psi_list = f.readlines()
- # print(psi_list)
- some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ')
- #print(some_list, full_list)
- some_avg10 = some_list[1].split('=')[1]
- some_avg60 = some_list[2].split('=')[1]
- some_avg300 = some_list[3].split('=')[1]
-
- full_avg10 = full_list[1].split('=')[1]
- full_avg60 = full_list[2].split('=')[1]
- full_avg300 = full_list[3].split('=')[1]
-
- return (some_avg10, some_avg60, some_avg300,
- full_avg10, full_avg60, full_avg300)
-
-
-print('Path to PSI file: {}\n'.format(psi_path))
-
-
-print(' avg10 avg60 avg300 avg10 avg60 avg300')
-
-while True:
- (some_avg10, some_avg60, some_avg300,
- full_avg10, full_avg60, full_avg300) = psi_path_to_metrics(psi_path)
-
- print('some {} {} {} | full {} {} {}'.format(
- some_avg10.rjust(6),
- some_avg60.rjust(6),
- some_avg300.rjust(6),
- full_avg10.rjust(6),
- full_avg60.rjust(6),
- full_avg300.rjust(6)))
-
- sleep(2)
diff --git a/trash/psi-monitor-old b/trash/psi-monitor-old
deleted file mode 100755
index 80694f9..0000000
--- a/trash/psi-monitor-old
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python3
-
-from time import sleep, time
-import os
-from sys import stdout
-
-mlockall = True
-
-if mlockall:
- from ctypes import CDLL
- CDLL('libc.so.6').mlockall(3)
-
-psi_path = '/proc/pressure/memory'
-
-psi_support = os.path.exists(psi_path)
-
-def rline1(path):
- """read 1st line from path."""
- with open(path) as f:
- for line in f:
- return line[:-1]
-
-def psi_mem_some_avg_total():
- return float(rline1(psi_path).rpartition('=')[2])
-
-avg_min_time = 1
-
-if psi_support:
- ta0 = time()
- a0 = psi_mem_some_avg_total()
-
-while True:
-
- if psi_support:
-
- ta1= time()
- dt = ta1 - ta0
-
- if dt >= avg_min_time:
-
- a1 = psi_mem_some_avg_total()
- avg = (a1 - a0) / (ta1 - ta0) / 10000
-
- print('avg time:', round(dt, 1))
- print('PSI mem avg:', round(avg, 2))
- print(rline1(psi_path), '\n')
- ta0 = ta1
- a0 = a1
-
- stdout.flush()
- sleep(0.1)
diff --git a/trash/psi-top b/trash/psi-top
deleted file mode 100755
index 73e695b..0000000
--- a/trash/psi-top
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/usr/bin/env python3
-
-from ctypes import CDLL
-from time import sleep, time
-import os
-
-"""
- Execute the command
- find /sys/fs/cgroup -name memory.pressure
- to find available memory.pressue files (except /proc/pressure/memory).
- (actual for cgroup2)
-"""
-
-psi_path = '/proc/pressure/memory'
-
-def mlockall():
-
- MCL_CURRENT = 1
- MCL_FUTURE = 2
- MCL_ONFAULT = 4
-
- libc = CDLL('libc.so.6', use_errno=True)
-
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
- )
- if result != 0:
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE
- )
- if result != 0:
- print('WARNING: cannot lock all memory')
- else:
- pass
- else:
- pass
-
-
-mlockall()
-
-t0 = time()
-
-def psi_path_to_metrics(psi_path):
-
- with open(psi_path) as f:
- psi_list = f.readlines()
- # print(psi_list)
- some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ')
- #print(some_list, full_list)
- some_avg10 = some_list[1].split('=')[1]
- some_avg60 = some_list[2].split('=')[1]
- some_avg300 = some_list[3].split('=')[1]
-
- full_avg10 = full_list[1].split('=')[1]
- full_avg60 = full_list[2].split('=')[1]
- full_avg300 = full_list[3].split('=')[1]
-
- return (some_avg10, some_avg60, some_avg300,
- full_avg10, full_avg60, full_avg300)
-
-
-
-def cgroup2_root():
- """
- """
- with open('/proc/mounts') as f:
- for line in f:
- if ' cgroup2 ' in line:
- # if line.startswith('cgroup2 '):
- return line[7:].rpartition(' cgroup2 ')[0]
-
-
-def get_psi_mem_files(cgroup2_path):
- """
- """
-
- path_list = []
-
- for root, dirs, files in os.walk(cgroup2_path):
- for file in files:
- path = os.path.join(root, file)
- if path.endswith('/memory.pressure'): #############
- path_list.append(path)
-
- return path_list
-
-
-def psi_path_to_cgroup2(path):
- """
- """
- return path.partition(i)[2][:-16]
-
-
-i = cgroup2_root()
-
-print('cgroup2 root dir:', i)
-if i is not None:
- y = get_psi_mem_files(i)
- for path in y:
- pass # print(psi_path_to_cgroup2(path))
-
-path_list = get_psi_mem_files(i)
-
-print(' avg10 avg60 avg300 avg10 avg60 avg300 cgroup2')
-
-print(' ----- ----- ------ ----- ----- ------ ---------')
-
-(some_avg10, some_avg60, some_avg300, full_avg10, full_avg60, full_avg300) = psi_path_to_metrics('/proc/pressure/memory')
-print('some {} {} {} | full {} {} {} {}'.format(
- some_avg10.rjust(6),
- some_avg60.rjust(6),
- some_avg300.rjust(6),
- full_avg10.rjust(6),
- full_avg60.rjust(6),
- full_avg300.rjust(6), '[SYSTEM]'))
-
-
-for psi_path in path_list:
- (some_avg10, some_avg60, some_avg300,
- full_avg10, full_avg60, full_avg300) = psi_path_to_metrics(psi_path)
-
- print('some {} {} {} | full {} {} {} {}'.format(
- some_avg10.rjust(6),
- some_avg60.rjust(6),
- some_avg300.rjust(6),
- full_avg10.rjust(6),
- full_avg60.rjust(6),
- full_avg300.rjust(6), psi_path_to_cgroup2(psi_path)))
-
-
-print(time() - t0)
-
-
diff --git a/trash/psi-trigger b/trash/psi-trigger
deleted file mode 100755
index 54c5be0..0000000
--- a/trash/psi-trigger
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python3
-
-from time import sleep, time
-
-t0 = time()
-
-hog_list = []
-
-duration = 60
-
-sff_max = 0.55
-sff_min = 0.45
-
-mb = 1024 * 1024
-path = '/dev/zero'
-
-def sff():
- """
- SwapFree fraction
- """
- with open('/proc/meminfo') as f:
- for i in f:
- if i.startswith('SwapTotal'):
- st = i.split(':')[1].strip(' kB\n')
- if i.startswith('SwapFree'):
- sf = i.split(':')[1].strip(' kB\n')
- st = float(st) + 1
- sf = float(sf)
- return sf / st
-
-
-def hog(hog_list):
- """
- """
- with open(path, 'rb') as f:
- raw = f.read(mb)
- hog_list.append(raw)
-
- return hog_list
-
-
-while True:
-
- while sff() > sff_min:
- hog_list.reverse()
- if time() - t0 > duration:
- exit()
- hog_list = hog(hog_list)
- print('MiB:', len(hog_list), 'SwapFree:', sff(), 'time:', time() - t0)
-
- while sff() < sff_max:
- hog_list.reverse()
- if time() - t0 > duration:
- exit()
- try:
- hog_list.pop()
- except IndexError:
- break
- print('MiB:', len(hog_list), 'SwapFree:', sff(), 'time:', time() - t0)
-
diff --git a/trash/psi_dummy b/trash/psi_dummy
deleted file mode 100644
index f490e2e..0000000
--- a/trash/psi_dummy
+++ /dev/null
@@ -1,2 +0,0 @@
-some avg10=56.70 avg60=51.59 avg300=22.92 total=195239452
-full avg10=28.82 avg60=49.77 avg300=21.83 total=182504463
diff --git a/trash/random-trigger b/trash/random-trigger
deleted file mode 100755
index 2c49a46..0000000
--- a/trash/random-trigger
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env python3
-
-import random
-
-# число элементов в списке, влияет на потребляемую память
-n = 400 * 1000 * 1000
-
-# число итераций замены элементов списка
-c = 10
-
-print('Наполняем список (n = {}) случайными числами...'.format(n))
-
-try:
- # добавляем в пустой список n случайных чисел
- x = []
- for _ in range(n):
- x.append(random.random())
-
- for i in range(c):
- print('Читение и запись новых значений, итерация {} из {}'.format(i + 1, c))
-
- # заменяем элементы списка на новые
- for i in range(n):
- x[i] = x[i] * 0.999
-
- del x
-
-except KeyboardInterrupt:
- del x
-
diff --git a/trash/rawcat b/trash/rawcat
deleted file mode 100755
index 1cff205..0000000
--- a/trash/rawcat
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-
-# rawcat 1 999 /path/to/file
-
-from sys import argv
-
-print('argv:')
-print(argv)
-
-mode = int(argv[1])
-num = int(argv[2])
-path = argv[3]
-
-if mode == 0:
- with open(path, 'rb') as f:
- raw = f.read(num)
-if mode == 1:
- with open(path, 'rb') as f:
- raw = f.read(num).decode('utf-8', 'ignore')
-if mode == 2:
- with open(path) as f:
- raw = f.read(num)
-
-print('raw:')
-print([raw])
diff --git a/trash/t01 b/trash/t01
deleted file mode 100755
index bf69058..0000000
--- a/trash/t01
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python3
-
-from signal import signal, SIGTERM
-
-def signal_handler(signum, frame):
- print('Got signal {}'.format(signum))
-
-signal(SIGTERM, signal_handler)
-
-rate = 99999
-
-x = []
-
-while True:
- x.append('#' * rate)
-
-# http://okturing.com/src/6140/body
diff --git a/trash/thanatolog b/trash/thanatolog
deleted file mode 100755
index fcf6dba..0000000
--- a/trash/thanatolog
+++ /dev/null
@@ -1,169 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-from time import sleep, time
-from signal import (signal,
- SIGKILL, SIGTERM, SIGINT, SIGQUIT,
- SIGCONT, SIGUSR1, SIGUSR2,
- SIGHUP, SIGABRT, SIGSEGV, SIGBUS)
-from sys import argv, exit
-
-
-def mlockall():
- """Lock all memory to prevent swapping the process."""
-
- from ctypes import CDLL
-
- MCL_CURRENT = 1
- MCL_FUTURE = 2
- MCL_ONFAULT = 4
-
- libc = CDLL('libc.so.6', use_errno=True)
-
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
- )
- if result != 0:
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE
- )
- if result != 0:
- print('WARNING: cannot lock all memory')
- else:
- log('All memory locked with MCL_CURRENT | MCL_FUTURE')
- else:
- print('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
-
-
-def check_mem():
- """find mem_available"""
- with open('/proc/meminfo') as f:
- for n, line in enumerate(f):
- if n is 2:
- mem_available = int(line.split(':')[1][:-4])
- return mem_available
-
-
-def pid_to_name(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/comm', 'rb') as f:
- return f.read().decode('utf-8', 'ignore')[:-1]
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
-
-
-def pid_to_state(pid):
-
- x = rline1('/proc/' + pid + '/stat')
-
- if ')' in x:
- return x.rpartition(')')[2][1]
- else:
- return ' '
-
-
-def pid_to_rss(pid):
- try:
- rss = rline1('/proc/{}/statm'.format(pid)).split(' ')[1]
- except IndexError:
- rss = '-0'
- return rss
-
-
-def pid_to_realpath(pid):
- try:
- return os.path.realpath('/proc/' + pid + '/exe')
- except FileNotFoundError:
- return ''
-
-
-def rline1(path):
- """read 1st line from path."""
- try:
- with open(path) as f:
- for line in f:
- return line[:-1]
- except UnicodeDecodeError:
- with open(path, 'rb') as f:
- return f.read(999).decode(
- 'utf-8', 'ignore').split('\n')[0] # use partition()!
- except FileNotFoundError:
- return 'FileNotFoundError'
- except ProcessLookupError:
- return 'ProcessLookupError'
-
-
-###############################################################################
-
-
-if len(argv) != 2:
- print("""Usage:
-thanatolog PID""")
- exit()
-
-
-mlockall()
-
-
-pid = argv[1]
-name = pid_to_name(pid)
-rss0 = float(pid_to_rss(pid))
-ma = check_mem()
-
-
-print('PID:', pid)
-print('Name:', name)
-print('RSS at startup: {} (100.0 %)'.format(int(rss0)))
-print('MemAvail:', ma)
-
-send_signal = SIGKILL
-
-# os.kill(int(pid), SIGCONT)
-
-
-t0 = time()
-
-
-for i in range(10):
- rpe = os.path.exists('/proc/{}/exe'.format(pid))
- rss = pid_to_rss(pid)
- pe = os.path.exists('/proc/{}'.format(pid))
- t1 = time()
- d = t1 - t0
- state = pid_to_state(pid)
- ma = check_mem()
- vv = pid_to_cmdline(pid)
- print(vv)
- print('RP: {} | RSS: {} ({} %) | State: {} | time: {} | MemAv'
- 'ail: {}'.format(rpe, rss, round(float(rss) / (
- rss0 + 0.0001) * 100, 1), state, round(d, 3), ma))
-
-print('Send SIGKILL')
-
-os.kill(int(pid), send_signal)
-
-t0 = time()
-
-while True:
- rpe = os.path.exists('/proc/{}/exe'.format(pid))
- rss = pid_to_rss(pid)
- pe = os.path.exists('/proc/{}'.format(pid))
- t1 = time()
- d = t1 - t0
- state = pid_to_state(pid)
- ma = check_mem()
-
- vv = pid_to_cmdline(pid)
- print(vv)
-
- print('RP: {} | RSS: {} ({} %) | State: {} | time: {} | MemAv'
- 'ail: {}'.format(rpe, rss, round(float(rss) / (
- rss0 + 0.0001) * 100, 1), state, round(d, 3), ma))
-
- if pe is False:
- print('Process {} ({}) died in {} sec'.format(pid, name, round(d, 3)))
- exit()
diff --git a/trash/thanatolog2 b/trash/thanatolog2
deleted file mode 100755
index 7d058b8..0000000
--- a/trash/thanatolog2
+++ /dev/null
@@ -1,177 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-from time import sleep, time
-from signal import (signal,
- SIGKILL, SIGTERM, SIGINT, SIGQUIT,
- SIGCONT, SIGUSR1, SIGUSR2,
- SIGHUP, SIGABRT, SIGSEGV, SIGBUS)
-from sys import argv, exit
-
-
-def mlockall():
- """Lock all memory to prevent swapping the process."""
-
- from ctypes import CDLL
-
- MCL_CURRENT = 1
- MCL_FUTURE = 2
- MCL_ONFAULT = 4
-
- libc = CDLL('libc.so.6', use_errno=True)
-
- result = libc.mlockall(
- MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT
- )
-
-
-def check_mem():
- """find mem_available"""
- with open('/proc/meminfo') as f:
- for n, line in enumerate(f):
- if n is 2:
- mem_available = int(line.split(':')[1][:-4])
- return round(mem_available / 1024.0)
-
-
-def pid_to_name(pid):
- """
- """
- try:
- with open('/proc/' + pid + '/comm', 'rb') as f:
- return f.read().decode('utf-8', 'ignore')[:-1]
- except FileNotFoundError:
- return ''
- except ProcessLookupError:
- return ''
-
-
-def pid_to_state(pid):
-
- x = rline1('/proc/' + pid + '/stat')
-
- if ')' in x:
- return x.rpartition(')')[2][1]
- else:
- return ' '
-
-
-def pid_to_rss(pid, SC_PAGESIZE):
- try:
- rss = rline1('/proc/{}/statm'.format(pid)).split(' ')[1]
- except IndexError:
- rss = '-0'
- return round(int(rss) * SC_PAGESIZE / (1024.0 ** 2))
-
-
-def pid_to_realpath(pid):
- try:
- return os.path.realpath('/proc/' + pid + '/exe')
- except FileNotFoundError:
- return ''
-
-
-def rline1(path):
- """read 1st line from path."""
- try:
- with open(path) as f:
- for line in f:
- return line[:-1]
- except UnicodeDecodeError:
- with open(path, 'rb') as f:
- return f.read(999).decode(
- 'utf-8', 'ignore').split('\n')[0] # use partition()!
- except FileNotFoundError:
- return 'FileNotFoundError'
- except ProcessLookupError:
- return 'ProcessLookupError'
-
-
-###############################################################################
-
-
-if len(argv) != 2:
- print("""Usage:
- thanatolog PID""")
- exit()
-
-
-mlockall()
-
-SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE'])
-pid = argv[1]
-name = pid_to_name(pid)
-rss0 = float(pid_to_rss(pid, SC_PAGESIZE))
-ma = check_mem()
-
-
-print('PID:', pid)
-print('Name:', name)
-print('RSS at startup: {} (100.0 %)'.format(int(rss0)))
-print('MemAvail:', ma)
-
-send_signal = SIGKILL
-
-# os.kill(int(pid), SIGCONT)
-
-
-t0 = time()
-
-
-for i in range(10):
- sleep(0.001)
- rpe = os.path.exists('/proc/{}/exe'.format(pid))
- rss = pid_to_rss(pid, SC_PAGESIZE)
- pe = os.path.exists('/proc/{}'.format(pid))
- t1 = time()
- d = t1 - t0
- state = pid_to_state(pid)
- ma = check_mem()
- print('RP: {} | RSS: {} ({} %) | {} | t: {:0<6} | MemAv'
- 'ail: {}'.format(rpe, rss, round(float(rss) / (
- rss0 + 0.0001) * 100, 1), state, str(round(d, 4)), ma))
-
-
-print()
-
-print('Send SIGKILL')
-
-os.kill(int(pid), send_signal)
-
-t0 = time()
-
-ma0 = ma
-
-
-while True:
- sleep(0.001)
- rpe = os.path.exists('/proc/{}/exe'.format(pid))
- rss = pid_to_rss(pid, SC_PAGESIZE)
- pe = os.path.exists('/proc/{}'.format(pid))
- t1 = time()
- d = t1 - t0
- state = pid_to_state(pid)
- ma = check_mem()
-
- print('RP: {} | RSS: {} ({} %) | State: {} | time: {} | MemAvail: {} | dMA {}'.format(
- rpe, rss, round(float(rss) / (rss0 + 0.0001) * 100, 1), state, round(d, 3), ma, ma0 - ma))
-
- if pe is False:
- break
-
-print('Process {} ({}) died in {} sec'.format(pid, name, round(d, 3)))
-
-
-print()
-for i in range(10):
- sleep(0.001)
- rpe = os.path.exists('/proc/{}/exe'.format(pid))
- rss = pid_to_rss(pid, SC_PAGESIZE)
- pe = os.path.exists('/proc/{}'.format(pid))
- t1 = time()
- d = t1 - t0
- state = pid_to_state(pid)
- ma = check_mem()
-
- print('RP: {} | RSS: {} ({} %) | State: {} | time: {} | MemAvail: {} | dMA {}'.format(
- rpe, rss, round(float(rss) / (rss0 + 0.0001) * 100, 1), state, round(d, 3), ma, ma0 - ma))
diff --git a/trash/x01 b/trash/x01
deleted file mode 100755
index 312bc14..0000000
--- a/trash/x01
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/usr/bin/env python3
-
-
-from os import getpid
-
-# find mem_total
-# find positions of SwapFree and SwapTotal in /proc/meminfo
-
-with open('/proc/meminfo') as f:
- mem_list = f.readlines()
-
-mem_list_names = []
-for s in mem_list:
- mem_list_names.append(s.split(':')[0])
-
-if mem_list_names[2] != 'MemAvailable':
- errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied')
- # exit(1)
-
-swap_total_index = mem_list_names.index('SwapTotal')
-swap_free_index = swap_total_index + 1
-
-mem_total = int(mem_list[0].split(':')[1][:-4])
-
-# Get names from /proc/*/status to be able to get VmRSS and VmSwap values
-
-with open('/proc/self/status') as file:
- status_list = file.readlines()
-
-status_names = []
-for s in status_list:
- status_names.append(s.split(':')[0])
-
-ppid_index = status_names.index('PPid')
-vm_size_index = status_names.index('VmSize')
-vm_rss_index = status_names.index('VmRSS')
-vm_swap_index = status_names.index('VmSwap')
-uid_index = status_names.index('Uid')
-state_index = status_names.index('State')
-
-
-try:
- anon_index = status_names.index('RssAnon')
- file_index = status_names.index('RssFile')
- shmem_index = status_names.index('RssShmem')
- detailed_rss = True
- # print(detailed_rss, 'detailed_rss')
-except ValueError:
- detailed_rss = False
- # print('It is not Linux 4.5+')
-
-
-
-self_pid = str(getpid())
-
-
-def self_rss():
- r = pid_to_status(self_pid)[5]
- print(r)
-
-
-
-def pid_to_status(pid):
- """
- """
-
- try:
-
- with open('/proc/' + pid + '/status') as f:
-
- for n, line in enumerate(f):
-
- if n is 0:
- name = line.split('\t')[1][:-1]
-
- if n is state_index:
- state = line.split('\t')[1][0]
- continue
-
- if n is ppid_index:
- ppid = line.split('\t')[1][:-1]
- continue
-
- if n is uid_index:
- uid = line.split('\t')[2]
- continue
-
- if n is vm_size_index:
- vm_size = int(line.split('\t')[1][:-4])
- continue
-
- if n is vm_rss_index:
- vm_rss = int(line.split('\t')[1][:-4])
- continue
-
- if n is vm_swap_index:
- vm_swap = int(line.split('\t')[1][:-4])
- break
-
- return name, state, ppid, uid, vm_size, vm_rss, vm_swap
-
- except UnicodeDecodeError:
- return pid_to_status_unicode(pid)
-
- except FileNotFoundError:
- return None
-
- except ProcessLookupError:
- return None
-
- except ValueError:
- return None
-
-
-self_rss()
-
-
-import logging
-import subprocess
-import argparse
-
-
-self_rss()
-
diff --git a/trash/zram-off b/trash/zram-off
deleted file mode 100755
index f8aea9b..0000000
--- a/trash/zram-off
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-# Deactivate swap
-swapoff /dev/zram0
-
-# Reset zram
-echo 1 > /sys/block/zram0/reset
-
-# Remove zram module
-modprobe -r zram
diff --git a/trash/zram-on b/trash/zram-on
deleted file mode 100755
index 7507b2f..0000000
--- a/trash/zram-on
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/sh
-
-# загружаем в ядро модуль zram
-modprobe -v zram num_devices=4
-
-# задаем число потоков сжатия, равное числу ядер процессора
-CPUS="`nproc`"
-echo "$CPUS" > /sys/block/zram0/max_comp_streams
-
-# выбираем алгоритм сжатия, lz4 наиболее быстр, lzo сильнее сжимает
-ALG=lzo
-echo "$ALG" > /sys/block/zram0/comp_algorithm
-
-# задаем размер zram (FRACTION - размер устройства zram0 в процентах от MemTotal)
-FRACTION=100
-MEMORY=`perl -ne'/^MemTotal:\s+(\d+)/ && print $1*1024;' < /proc/meminfo`
-SIZE=$(( MEMORY * FRACTION / 100 ))
-
-echo $SIZE > /sys/block/zram0/disksize
-#echo 10G > /sys/block/zram0/disksize # можно задать размер zram в гигабайтах, вместо процентов от MemTotal
-
-# форматируем устройство zram0 как swap
-mkswap -L zram0 /dev/zram0
-
-# включаем подкачку
-swapon -d -p 10 /dev/zram0
-