diff --git a/CHANGELOG.md b/CHANGELOG.md index 5386441..af2e4ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,6 @@ - [x] `oom-sort` - [x] `psi-top` - [x] `psi-monitor` - - [x] `i-memhog` - [x] Improve poll rate algorithm - [x] Fixed Makefile for installation on CentOS 7 (remove gzip `-k` option). - [x] Added `max_post_sigterm_victim_lifetime` option: send SIGKILL to the victim if it doesn't respond to SIGTERM for a certain time diff --git a/Makefile b/Makefile index d3fb60f..aa629df 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,6 @@ install: install -m0755 ./oom-sort $(DESTDIR)/$(PREFIX)/usr/bin/oom-sort install -m0755 ./psi-top $(DESTDIR)/$(PREFIX)/usr/bin/psi-top install -m0755 ./psi-monitor $(DESTDIR)/$(PREFIX)/usr/bin/psi-monitor - install -m0755 ./i-memhog $(DESTDIR)/$(PREFIX)/usr/bin/i-memhog install -d $(DESTDIR)/$(PREFIX)/etc/nohang -git describe --tags --long --dirty > ./version @@ -41,7 +40,6 @@ uninstall: rm -fv $(PREFIX)/usr/bin/oom-sort rm -fv $(PREFIX)/usr/bin/psi-top rm -fv $(PREFIX)/usr/bin/psi-monitor - rm -fv $(PREFIX)/usr/bin/i-memhog rm -fv $(PREFIX)/usr/share/man/man1/nohang.1.gz rm -fv $(PREFIX)/usr/share/man/man1/oom-sort.1.gz rm -fv $(PREFIX)/lib/systemd/system/nohang.service diff --git a/README.md b/README.md index 6f9e408..040b162 100644 --- a/README.md +++ b/README.md @@ -292,7 +292,7 @@ See also `man journalctl`. You can also enable `separate_log` in the config to logging in `/var/log/nohang/nohang.log`. -## Additional tools: oom-sort, psi-top, psi-monitor, i-memhog +## Additional tools: oom-sort, psi-top, psi-monitor ### oom-sort @@ -503,10 +503,6 @@ some 0.29 7.58 14.58 | full 0.28 6.92 13.24 ``` -### i-memhog - -`i-memhog` is an interactive memory hog for testing purposes. - ## Contribution Use cases, feature requests and any questions are [welcome](https://github.com/hakavlad/nohang/issues). diff --git a/i-memhog b/i-memhog deleted file mode 100755 index 198461a..0000000 --- a/i-memhog +++ /dev/null @@ -1,315 +0,0 @@ -#!/usr/bin/env python3 - - -from signal import signal, SIGTERM -from time import sleep -from sys import exit -import os - - -# чек общей доступной, для lim2avail -def total_mem_available(): - - with open('/proc/meminfo') as file: - mem_list = file.readlines() - - mem_available = meminfo_num(mem_list, mem_available_index) - swap_free = meminfo_num(mem_list, swap_free_index) - - return round((swap_free + mem_available) / 1024) # MiB - - -# добитие байтами рандома -def terminal(): - ex = [] - while True: - try: - ex.append(os.urandom(1)) - except MemoryError: - continue - - -def meminfo_num(mem_list, index): - return int(mem_list[index].split(':')[1].split(' ')[-2]) - - -# выдача основных показателей meminfo, KiB -def mem_check_main(): - - with open('/proc/meminfo') as file: - mem_list = file.readlines() - - mem_available = meminfo_num(mem_list, mem_available_index) - swap_total = meminfo_num(mem_list, swap_total_index) - swap_free = meminfo_num(mem_list, swap_free_index) - - return mem_available, swap_total, swap_free - - -def signal_handler(signum, frame): - print('Got signal {}'.format(signum)) - # sleep(1) - # exit() - - -def meminfo(): - - # получаем сырой mem_list - with open('/proc/meminfo') as file: - mem_list = file.readlines() - - # получаем список названий позиций: MemTotal etc - mem_list_names = [] - for s in mem_list: - mem_list_names.append(s.split(':')[0]) - - # ищем MemAvailable, обрабатываем исключение - try: - mem_available_index = mem_list_names.index('MemAvailable') - except ValueError: - print("Your Linux kernel is too old (3.14+ requied), bye!") - # исключение для ядер < 3.14, не определяющих MemAvailable - exit() - - # ищем позиции SwapTotl и SwapFree - swap_total_index = mem_list_names.index('SwapTotal') - swap_free_index = mem_list_names.index('SwapFree') - - buffers_index = mem_list_names.index('Buffers') - cached_index = mem_list_names.index('Cached') - active_index = mem_list_names.index('Active') - inactive_index = mem_list_names.index('Inactive') - shmem_index = mem_list_names.index('Shmem') - - # ищем значение MemTotal в KiB - mem_total = int(mem_list[0].split(':')[1].split(' ')[-2]) - - return mem_total, mem_available_index, swap_total_index, swap_free_index, buffers_index, cached_index, active_index, inactive_index, shmem_index - - -meminfo_tuple = meminfo() - -mem_total = meminfo_tuple[0] -mem_available_index = meminfo_tuple[1] -swap_total_index = meminfo_tuple[2] -swap_free_index = meminfo_tuple[3] - -buffers_index = meminfo_tuple[4] -cached_index = meminfo_tuple[5] -active_index = meminfo_tuple[6] -inactive_index = meminfo_tuple[7] -shmem_index = meminfo_tuple[8] - - -# печать показателей на этапах работы -def print_mem(): - - mem_tup = mem_check_main() - - mem_available = mem_tup[0] - swap_total = mem_tup[1] - swap_free = mem_tup[2] - - print( - 'MemAvailable: ', - round( - mem_available / - 1024 / - 1024, - 3), - 'GiB,', - round( - mem_available / - 1024), - 'MiB,', - round( - mem_available / - mem_total * - 100, - 1), - '%') - - if swap_total != 0: - print( - 'SwapFree: ', - round( - swap_free / - 1024 / - 1024, - 3), - 'GiB,', - round( - swap_free / - 1024), - 'MiB,', - round( - swap_free / - swap_total * - 100, - 1), - '%') - print('Total Free: ', - round((mem_available + swap_free) / 1024 / 1024, - 3), - 'GiB,', - round((mem_available + swap_free) / 1024), - 'MiB,', - round((mem_available + swap_free) / (mem_total + swap_total) * 100, - 1), - '%') - else: - print( - 'Swap disabled' - ) - - -# бесконечный жор -def inf(): - - print( - 'Вводите целые неотрицательные числа. Чем больше, тем быстрее потребление памяти.\n1000 same обеспечивает потребление на уровне полтора гиг в секунду,\nurandom работает на скорости максимум 170 M/s' - ) - same = input("same: ") - urandom = input("urandom: ") - - expanding_list = [] - - print( - 'Процесс неограниченного потребления пошёл... Press Ctrl + C for exit' - ) - - while True: - try: - expanding_list.append(os.urandom(int(urandom))) - expanding_list.append('#' * int(same)) - except MemoryError: - print('MemoryError, start побайтовая добивалка!') - terminal() - - -def selfterm(): - os.kill(os.getpid(), signal.SIGTERM) - - -# жор числп гиг -def lim(): - - expanding_list = [] - - n = input('На сколько гигабайт уменьшить доступную память?\n: ') - - print('Погнали тратить ' + n + ' гиг...') - - i = 0 - - while True: - - i += 1 - - try: - expanding_list.append(os.urandom(int(100))) - expanding_list.append('#' * int(300)) - except MemoryError: - print('MemoryError!') - break - if i > 2020202 * int(n): - print('DONE') - break - - return expanding_list - - -# жор до остатка мегабайт -def lim2avail(): - - expanding_list = [] - - n = input( - 'Сколько мегабайт общей доступной памяти (MemAvailable + SwapFree) оставить?\nВведите целое положительное число: ' - ) - - # проверка на целое положительное - if n.isdigit(): - n = int(n) - else: - print( - 'Вы ввели не целое положительное число' - ) - return 0 - - if n == 0: - print( - 'Вы ввели не целое положительное число' - ) - return 0 - - print( - 'Погнали уменьшать доступную память до уровня ниже ' + - str(n) + - ' MiB...') - - while True: - try: - expanding_list.append(os.urandom(5000)) - expanding_list.append('#' * 5000) - except MemoryError: - print('MemoryError!') - break - if total_mem_available() <= n: - print('DONE') - break - - return expanding_list - - -print('WARNING: эта прога способна потратить память и повесить систему, будьте осторожны.') -print('При ее работе следите за показателями памяти.') - - -print('Ignore SIGTERM? (y|n)') - -sss = input(': ') - -if sss == 'y': - signal(SIGTERM, signal_handler) - print('The SIGTERM signal will be ignored') -else: - print('The SIGTERM signal will NOT be ignored') - - -ex_list = [] - -try: - while True: - - print() - print_mem() - print() - print('Выберите вариант из списка ниже') - print('8 или i или I - запустить бесконечное потребление, предложив выбрать скорость потребления и энтропию') - print('7 или l или L - запустить ограниченное потребление заданного числа гигов') - print('6 или a или A - жрать память пока количество доступной памяти не опустится ниже заданного') - print('0 или с или С - очистить накопления при их наличии') - print('q или любой другой символ - выход (можно просто нажать Enter)') - - li = input(': ') - - if li is 'l' or li is 'L' or li is '7': - x = lim() - ex_list.append(x) - elif li is 'i' or li is 'I' or li is '8': - inf() - elif li is 'c' or li is 'C' or li is '0': - ex_list = [] - x = 0 - y = 0 - elif li is '6' or li is 'a' or li is 'A': - y = lim2avail() - ex_list.append(y) - else: - exit() - -except KeyboardInterrupt: - print() - print_mem() - selfterm() diff --git a/trash/FAQ.ru.md b/trash/FAQ.ru.md deleted file mode 100644 index 1dda627..0000000 --- a/trash/FAQ.ru.md +++ /dev/null @@ -1,46 +0,0 @@ - - - - - - -Fedora 21 (KDE) Гуглил долго, про oom-killer много чего прочитал. Но не понял, как его активировать. whereis его не нашёл, в репах его не нашёл. Если это встроенное средство - почему у меня не запускается и где его конфиг? Мне нужно тупо, чтобы если оперативка исчерпалась - прибило последнюю вкладку хрома. Либо вообще killall chrome тупо и всё. Как проще всего сделать? -https://www.linux.org.ru/forum/desktop/11511840 - - - -OOM Killer под себя - -Здравствуйте! -Задача такова, надо (курсовая работа по системному программированию) пропатчить ядро с изменением oom killer'a, т.е. -1)требуется создать некоторый список с названиями процессов, которые "мудрый киллер" будет удалять в первую очередь или наоборот оставлять. -2)требуется создать свою систему расчета приоритетов для удаления процессов. -Я НЕ продвинутый линуксоид, поэтому прошу помощи у общественности. С чего начать и как приступить к выполнению? -https://www.linux.org.ru/forum/development/3555574 - - - - - -Товарищи, можете посоветовать какую-нибудь стандартную программу, которая, в какой-то определённо ситуации, жрала память, что приводило бы к запуску Killer`a. -Чтобы потом можно было на ней тестить курсач) -https://www.linux.org.ru/forum/development/3555574?cid=3570489 - - -Post-mortem по логам OOM-killer. -Суть такова, в определенный момент на машине случилось нечто, в результате чего кончилась физическая память и ОС залезла в своп с головой. Производительность упала до потери всякого отклика от машины. OOM-killer включился, но, похоже, занимался убийством исключительно невиновных и никому не мешающих процессов. -Можно ли как-то по логам отследить, кто сожрал всю память? Хотя бы имена павших жертвами киллера? -Или сразу самому писать скрипт, сбрасывающий на диск ps aux, для следующего такого случая? -https://www.linux.org.ru/forum/admin/5693261 - - -Проблема в том что у меня на VDS мало оперативки и она занята полностью пытаясь подключиться сервер не может выделить памяти и обрывает соединение. что прописать чтобы ссх постоянно висел в процессах с выделенным скажем полумегабайтным или сколько там ему достаточно лимитом? -https://www.linux.org.ru/forum/general/5399920?cid=5399954 - - - - - - - - diff --git a/trash/isascii b/trash/isascii deleted file mode 100755 index a897776..0000000 --- a/trash/isascii +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 -"""Check file for non-ascii lines.""" - -from sys import argv - -path = argv[1] - -print('Path:', path) - - -def isascii(string): - try: - string_ascii = string.encode('ascii') - return True - except UnicodeEncodeError: - return False - - -def check_file(): - num = 0 - with open(path) as f: - for n, line in enumerate(f): - res = isascii(line) - if res: - continue - else: - print('Line {} is non-ascii:'.format(n + 1)) - print(line.strip('\n')) - num += 1 - continue - print('Found {} non-ascii lines'.format(num)) - - -check_file() diff --git a/trash/memco.py b/trash/memco.py deleted file mode 100644 index c17a6b5..0000000 --- a/trash/memco.py +++ /dev/null @@ -1,276 +0,0 @@ -# memdler common - -import os -import glob -import signal -import subprocess -from glob import glob -from time import sleep - - -# k = mem_total_used / (zram own size) -k = 0.0042 - - -def meminfo(): - - # получаем сырой mem_list - with open('/proc/meminfo') as file: - mem_list = file.readlines() - - # получаем список названий позиций: MemTotal etc - mem_list_names = [] - for s in mem_list: - mem_list_names.append(s.split(':')[0]) - - # ищем MemAvailable, обрабатываем исключение - try: - mem_available_index = mem_list_names.index('MemAvailable') - except ValueError: - print("Your Linux kernel is too old (3.14+ requied), bye!") - # исключение для ядер < 3.14, не определяющих MemAvailable - exit() - - # ищем позиции SwapTotl и SwapFree - swap_total_index = mem_list_names.index('SwapTotal') - swap_free_index = mem_list_names.index('SwapFree') - - buffers_index = mem_list_names.index('Buffers') - cached_index = mem_list_names.index('Cached') - active_index = mem_list_names.index('Active') - inactive_index = mem_list_names.index('Inactive') - shmem_index = mem_list_names.index('Shmem') - - # ищем значение MemTotal в KiB - mem_total = int(mem_list[0].split(':')[1].split(' ')[-2]) - - return mem_total, mem_available_index, swap_total_index, swap_free_index, buffers_index, cached_index, active_index, inactive_index, shmem_index - - -meminfo_tuple = meminfo() - -mem_total = meminfo_tuple[0] -mem_available_index = meminfo_tuple[1] -swap_total_index = meminfo_tuple[2] -swap_free_index = meminfo_tuple[3] - -buffers_index = meminfo_tuple[4] -cached_index = meminfo_tuple[5] -active_index = meminfo_tuple[6] -inactive_index = meminfo_tuple[7] -shmem_index = meminfo_tuple[8] - - -def meminfo_num(mem_list, index): - return int(mem_list[index].split(':')[1].split(' ')[-2]) - - -# выдача основных показателей meminfo, KiB -def mem_check_main(): - - with open('/proc/meminfo') as file: - mem_list = file.readlines() - - mem_available = meminfo_num(mem_list, mem_available_index) - swap_total = meminfo_num(mem_list, swap_total_index) - swap_free = meminfo_num(mem_list, swap_free_index) - - return mem_available, swap_total, swap_free - - -# читать не весь файл, а нужный срез от 0 до 20, например -def mem_check_full(): - - with open('/proc/meminfo') as file: - mem_list = file.readlines() - - mem_available = meminfo_num(mem_list, mem_available_index) - swap_total = meminfo_num(mem_list, swap_total_index) - swap_free = meminfo_num(mem_list, swap_free_index) - - buffers = meminfo_num(mem_list, buffers_index) - cached = meminfo_num(mem_list, cached_index) - active = meminfo_num(mem_list, active_index) - inactive = meminfo_num(mem_list, inactive_index) - shmem = meminfo_num(mem_list, shmem_index) - - return mem_available, swap_total, swap_free, buffers, cached, active, inactive, shmem - - -# чек общей доступной, для lim2avail -def total_mem_available(): - - with open('/proc/meminfo') as file: - mem_list = file.readlines() - - mem_available = meminfo_num(mem_list, mem_available_index) - swap_free = meminfo_num(mem_list, swap_free_index) - - return round((swap_free + mem_available) / 1024) # MiB - - -# добитие байтами рандома -def terminal(): - ex = [] - while True: - try: - ex.append(os.urandom(1)) - except MemoryError: - continue - - -# перевод дроби в проценты -def percent(num): - a = str(round(num * 100, 1)).split('.') - a0 = a[0].rjust(3, ' ') - a1 = a[1] - return '{}.{}'.format(a0, a1) - - -def human(num): - return str(round(num / 1024.0)).rjust(8, ' ') - - - -# B -> GiB -def humanz(num): - a = str(round(num / 1073741824, 3)) - a0 = a.split('.')[0].rjust(4, ' ') - a1 = a.split('.')[1] - if len(a1) == 1: - a1 += '00' - if len(a1) == 2: - a1 += '0' - return '{}.{}'.format(a0, a1) - - - -movie_dict = { - '+----': '-+---', - '-+---': '--+--', - '--+--': '---+-', - '---+-': '----+', - '----+': '+----' - } - - -def config_parser(config): - if os.path.exists(config): - try: - with open(config) as f: - name_value_dict = dict() - for line in f: - a = line.startswith('#') - b = line.startswith('\n') - c = line.startswith('\t') - d = line.startswith(' ') - if not a and not b and not c and not d: - a = line.split('=') - name_value_dict[a[0].strip()] = a[1].strip() - return name_value_dict - except PermissionError: - print('config: permission error') - else: - print('config does not exists') - - - - -def swaps_raw(part_string): - '''анализ строки свопс, возврат кортежа с значениями''' - part_string_list = part_string.split('\t') - part_name = part_string_list[0].split(' ')[0] - - part_size = int(part_string_list[-3]) - part_used = int(part_string_list[-2]) - part_prio = int(part_string_list[-1]) - - return part_name, part_size, part_used, part_prio - - - -# возвращает disksize и mem_used_total по zram id -def zram_stat(zram_id): - with open('/sys/block/zram' + zram_id + '/disksize') as file: - disksize = file.readlines()[0][:-1] - if os.path.exists('/sys/block/zram' + zram_id + '/mm_stat'): - with open('/sys/block/zram' + zram_id + '/mm_stat') as file: - mm_stat = file.readlines()[0][:-1].split(' ') - mm_stat_list = [] - for i in mm_stat: - if i != '': - mm_stat_list.append(i) - mem_used_total = mm_stat_list[2] - else: - with open('/sys/block/zram' + zram_id + '/mem_used_total') as file: - mem_used_total = file.readlines()[0][:-1] - return disksize, mem_used_total - - - - - - - - -# termer(signal.SIGKILL) -# process terminator -# функция поиска жиробаса и его убийства -def terminator(signal): - - subdirs = glob('/proc/*/') - subdirs.remove('/proc/self/') - subdirs.remove('/proc/thread-self/') - - pid_list = [] - name_list = [] - oom_score_list = [] - - for subdir in subdirs: - - try: - - with open(subdir + 'status') as file: - status = file.readlines() - - pid_list.append(status[5].split(':')[1][1:-1]) - name_list.append(status[0].split(':')[1][1:-1]) - - except Exception: - pass - - try: - - with open(subdir + 'oom_score') as file: - oom_score = file.readlines() - - oom_score_list.append(int(oom_score[0][0:-1])) - - except Exception: - pass - - max_oom_score = sorted(oom_score_list)[-1] - n = oom_score_list.index(max_oom_score) - s = sorted(oom_score_list) - s.reverse() - - if signal == signal.SIGTERM: - print('\nTRY TO TERM {}, Pid {}\n'.format(name_list[n], pid_list[n])) - else: - print('\nTRY TO KILL {}, Pid {}\n'.format(name_list[n], pid_list[n])) - - try: - os.kill(int(pid_list[n]), signal) - except ProcessLookupError: - print('No such process') - - - - -def selfterm(): - os.kill(os.getpid(), signal.SIGTERM) - - - - - diff --git a/trash/memleak/install.sh b/trash/memleak/install.sh deleted file mode 100755 index 0f765ee..0000000 --- a/trash/memleak/install.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -cp ./memleak /usr/sbin/memleak -cp ./memleak.service /lib/systemd/system/memleak.service -systemctl daemon-reload diff --git a/trash/memleak/memleak b/trash/memleak/memleak deleted file mode 100755 index e26ee7f..0000000 --- a/trash/memleak/memleak +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python3 - -from os import system -from time import sleep - -x = [] - -while True: - x.append('#' * 99999) - sleep(0.1) - system('sleep 9999 &') - diff --git a/trash/memleak/memleak.service b/trash/memleak/memleak.service deleted file mode 100644 index 0a7f3c0..0000000 --- a/trash/memleak/memleak.service +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Memory leak daemon -After=sysinit.target - -[Service] -ExecStart=/usr/sbin/memleak - -[Install] -WantedBy=multi-user.target diff --git a/trash/mm b/trash/mm deleted file mode 100755 index 792e953..0000000 --- a/trash/mm +++ /dev/null @@ -1,315 +0,0 @@ -#!/usr/bin/env python3 - -# ms-monitor/ - -from memco import * -import time - -# once or 1, log or 2, inplace or 3 -mode = '3' - -# период цикла печати -period = 0.2 - -# параметры визуализации -used = '$' -free = '~' -len_visual = 14 - -# нахождение и печать параметров, возвращает показатели и принимает показатели для нахождения дельт -def printer(old_list): - - mem_tup = mem_check_main() - - mem_available = mem_tup[0] - swap_total = mem_tup[1] - swap_free = mem_tup[2] - - tn = time.time() - delta = tn - old_list[4] - - mem_busy = mem_total - mem_available - swap_busy = swap_total - swap_free - - mem_swap_total = mem_total + swap_total - mem_swap_free = mem_available + swap_free - mem_swap_busy = mem_busy + swap_busy - - delta_mem = (mem_busy - old_list[0]) / delta - delta_swap = (swap_busy - old_list[1]) / delta - delta_all = (mem_swap_busy - old_list[2]) / delta - - if swap_total == 0: - -#1################################################################################### - - # печать без свопа - - mem_visual = ( - used * round(mem_busy / mem_total * len_visual) - ).ljust(len_visual, free) - - print( - ' MEM' - ) - - print( - 'TOTAL {}'.format( - human(mem_total), - ) - ) - print( - 'N/A {} {}'.format( - human(mem_busy), - percent(mem_busy / mem_total), - ) - ) - print( - 'AVAIL {} {}'.format( - human(mem_available), - percent(mem_available / mem_total), - ) - ) - print( - 'DELTA {}'.format( - human(delta_mem), - ) - ) - print( - '{} {}'.format( - old_list[3], mem_visual - ) - ) - - -#2################################################################################### - - else: - - with open('/proc/swaps') as file: - swaps_list = file.readlines()[1:] - - zram_id_list = [] - - disk_swap_size = 0 - disk_swap_used = 0 - zram_swap_size = 0 - zram_swap_used = 0 - - for i in swaps_list: - - x = swaps_raw(i) - - if x[0].startswith('/dev/zram'): - - zram_swap_size += int(x[1]) - zram_swap_used += int(x[2]) - - zram_id_list.append(x[0][9:]) - - else: - - disk_swap_size += int(x[1]) - disk_swap_used += int(x[2]) - - if zram_swap_size == 0: - -#3################################################################################### - - # печать своп без зрам - - mem_visual = ( - used * round(mem_busy / mem_total * len_visual) - ).ljust(len_visual, free) - swap_visual = ( - used * round(swap_busy / swap_total * len_visual) - ).ljust(len_visual, free) - mem_swap_visual = ( - used * round(mem_swap_busy / mem_swap_total * len_visual) - ).ljust(len_visual, free) - - print( - ' MEM SWAP MEM + SWAP' - ) - - print( - 'TOTAL {} {} {}'.format( - human(mem_total), - human(swap_total), - human(mem_swap_total), - ) - ) - print( - 'N/A {} {} {} {} {} {}'.format( - human(mem_busy), - percent(mem_busy / mem_total), - human(swap_busy), - percent(swap_busy / swap_total), - human(mem_swap_busy), - percent(mem_swap_busy / mem_swap_total), - ) - ) - print( - 'AVAIL {} {} {} {} {} {}'.format( - human(mem_available), - percent(mem_available / mem_total), - human(swap_free), - percent(swap_free / swap_total), - human(mem_swap_free), - percent(mem_swap_free / mem_swap_total), - ) - ) - print( - 'DELTA {} {} {}'.format( - human(delta_mem), - human(delta_swap), - human(delta_all) - ) - ) - print( - '{} {} {} {}'.format( - old_list[3], - mem_visual, - swap_visual, - mem_swap_visual, - ) - ) - print() - -#4################################################################################### - - else: - - # суммируем показатели из всех свопов в зрам - - disksize_sum = 0 - mem_used_total_sum = 0 - - for i in zram_id_list: - s = zram_stat(i) # кортеж из disksize и mem_used_total для данного zram id - disksize_sum += int(s[0]) - mem_used_total_sum += int(s[1]) - - # находим показатели для ZRAM - full = disksize_sum * k + mem_used_total_sum - profit = zram_swap_used - (full / 1024) - cr_real = round(zram_swap_used * 1024 / mem_used_total_sum, 2) - -#5################################################################################### - - # печать своп + зрам - - mem_visual = ( - used * round(mem_busy / mem_total * len_visual) - ).ljust(len_visual, free) - swap_visual = ( - used * round(swap_busy / swap_total * len_visual) - ).ljust(len_visual, free) - mem_swap_visual = ( - used * round(mem_swap_busy / mem_swap_total * len_visual) - ).ljust(len_visual, free) - zram_visual = ( - used * round(full / 1024 / mem_total * 18) - ).ljust(18, free) - - print( - ' MEM SWAP MEM + SWAP ZRAM SWAP' - ) - - print( - 'TOTAL {} {} {} PROFIT {} M'.format( - human(mem_total), - human(swap_total), - human(mem_swap_total), - human(profit) - ) - ) - print( - 'N/A {} {} {} {} {} {} CR {}'.format( - human(mem_busy), - percent(mem_busy / mem_total), - human(swap_busy), - percent(swap_busy / swap_total), - human(mem_swap_busy), - percent(mem_swap_busy / mem_swap_total), - str(cr_real).rjust(7, ' ') - ) - ) - print( - 'AVAIL {} {} {} {} {} {} FULL/MT {} %'.format( - human(mem_available), - percent(mem_available / mem_total), - human(swap_free), - percent(swap_free / swap_total), - human(mem_swap_free), - percent(mem_swap_free / mem_swap_total), - percent(full / 1024 / mem_total) - ) - ) - print( - 'DELTA {} {} {}'.format( - human(delta_mem), - human(delta_swap), - human(delta_all) - ) - ) - print( - '{} {} {} {} {}'.format( - old_list[3], - mem_visual, - swap_visual, - mem_swap_visual, - zram_visual - ) - ) - print() - -#6################################################################################### - - # печать по партициям - - print('FILENAME USED SIZE PRIORITY') - - for i in swaps_list: - x = swaps_raw(i) - print( - '{} {} G {} % {} G {}'.format( - str(x[0]).ljust(26, ' '), - human(x[2]), - percent(x[2] / x[1]), human(x[1]), - str(x[3]).rjust(10, ' ') - ) - ) - - return [mem_busy, swap_busy, mem_swap_busy, movie_dict[(old_list[3])], tn] - - - - - - -try: - - delta = [0, 0, 0, '+----', 0] - - if mode == 'log' or mode == '2': - - while True: - delta = printer(delta) - sleep(period) - - elif mode == 'inplace' or mode == '3': - - while True: - print("\033c") - delta = printer(delta) - sleep(period) - - else: - - delta = printer(delta) - -except KeyboardInterrupt: - print() - exit() - diff --git a/trash/n10 b/trash/n10 deleted file mode 100755 index ada86b7..0000000 --- a/trash/n10 +++ /dev/null @@ -1,3020 +0,0 @@ -#!/usr/bin/env python3 -"""A daemon that prevents OOM in Linux systems.""" - -import os -from ctypes import CDLL -from time import sleep, time -from operator import itemgetter -from sys import stdout, stderr, argv, exit, version -from re import search -from sre_constants import error as invalid_re -from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP - - -start_time = time() - - -help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG] - -optional arguments: - -h, --help show this help message and exit - -v, --version print version - -t, --test print some tests - -p, --print-proc-table - print table of processes with their badness values - -c CONFIG, --config CONFIG - path to the config file, default values: - ./nohang.conf, /etc/nohang/nohang.conf""" - - -SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK']) - -SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE']) - -conf_err_mess = 'Invalid config. Exit.' - -sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP] - -sig_dict = { - SIGKILL: 'SIGKILL', - SIGINT: 'SIGINT', - SIGQUIT: 'SIGQUIT', - SIGHUP: 'SIGHUP', - SIGTERM: 'SIGTERM' -} - -self_pid = str(os.getpid()) - -self_uid = os.geteuid() - -if self_uid == 0: - root = True -else: - root = False - - -if os.path.exists('./nohang_notify_helper'): - notify_helper_path = './nohang_notify_helper' -else: - notify_helper_path = '/usr/sbin/nohang_notify_helper' - - -victim_dict = dict() - - - -victim_id = None -actions_time_dict = dict() -actions_time_dict['action_handled'] = [time(), victim_id] -# print(actions_time_dict) - - - -# will store corrective actions stat -stat_dict = dict() - - -separate_log = False # will be overwritten after parse config - - -def find_cgroup_indexes(): - """ Find cgroup-line positions in /proc/*/cgroup file. - """ - - cgroup_v1_index = None - cgroup_v2_index = None - - with open('/proc/self/cgroup') as f: - for index, line in enumerate(f): - if ':name=' in line: - cgroup_v1_index = index - if line.startswith('0::'): - cgroup_v2_index = index - - return cgroup_v1_index, cgroup_v2_index - - -cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes() - - -########################################################################## - -# define functions - -''' -def self_rss(): - """ - """ - return pid_to_status(self_pid)[5] - - -def print_self_rss(): - """ - """ - log('Self RSS: {} MiB'.format(self_rss())) -''' - - -def pid_to_rss(pid): - try: - rss = int(rline1( - '/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE - except IndexError: - rss = None - except FileNotFoundError: - rss = None - except ProcessLookupError: - rss = None - return rss - - -def pid_to_vm_size(pid): - try: - vm_size = int(rline1( - '/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE - except IndexError: - vm_size = None - except FileNotFoundError: - vm_size = None - except ProcessLookupError: - vm_size = None - return vm_size - - - - - - - - -def signal_handler(signum, frame): - """ - """ - for i in sig_list: - signal(i, signal_handler_inner) - log('Signal handler called with the {} signal '.format( - sig_dict[signum])) - update_stat_dict_and_print(None) - log('Exit') - exit() - - -def signal_handler_inner(signum, frame): - """ - """ - log('Signal handler called with the {} signal (ignored) '.format( - sig_dict[signum])) - - -def exe(cmd): - """ - """ - log('Execute the command: {}'.format(cmd)) - t0 = time() - write_self_oom_score_adj(self_oom_score_adj_max) - err = os.system(cmd) - write_self_oom_score_adj(self_oom_score_adj_min) - dt = time() - t0 - log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3))) - return err - - -def write(path, string): - """ - """ - with open(path, 'w') as f: - f.write(string) - - -def write_self_oom_score_adj(new_value): - """ - """ - if root: - write('/proc/self/oom_score_adj', new_value) - - -self_oom_score_adj_min = '-600' -self_oom_score_adj_max = '-6' - - -write_self_oom_score_adj(self_oom_score_adj_min) - - -def valid_re(reg_exp): - """Validate regular expression. - """ - try: - search(reg_exp, '') - except invalid_re: - log('Invalid config: invalid regexp: {}'.format(reg_exp)) - exit(1) - - -def func_print_proc_table(): - """ - """ - print_proc_table = True - find_victim(print_proc_table) - exit() - - -def log(*msg): - """ - """ - try: - print(*msg) - except OSError: - sleep(0.01) - if separate_log: - try: - info(*msg) - except OSError: - sleep(0.01) - - -def print_version(): - """ - """ - try: - v = rline1('/etc/nohang/version') - except FileNotFoundError: - v = None - if v is None: - print('Nohang unknown version') - else: - print('Nohang ' + v) - exit() - - -def test(): - """ - """ - print('\n(This option is not ready to use!)\n') - - print(version) - print(argv) - - hr = '==================================' - print(hr) - print("uptime()") - print(uptime()) - - print(hr) - print("os.uname()") - print(os.uname()) - - print(hr) - print("pid_to_starttime('self')") - print(pid_to_starttime('self')) - - print(hr) - print("get_victim_id('self')") - print(get_victim_id('self')) - - print(hr) - print("errprint('test')") - print(errprint('test')) - - print(hr) - print("mlockall()") - print(mlockall()) - - print(hr) - print("pid_to_state('2')") - print(pid_to_state('2')) - - exit() - - -def pid_to_cgroup_v1(pid): - """ - """ - cgroup_v1 = '' - try: - with open('/proc/' + pid + '/cgroup') as f: - for index, line in enumerate(f): - if index == cgroup_v1_index: - cgroup_v1 = '/' + line.partition('/')[2][:-1] - return cgroup_v1 - except FileNotFoundError: - return '' - - -def pid_to_cgroup_v2(pid): - """ - """ - cgroup_v2 = '' - try: - with open('/proc/' + pid + '/cgroup') as f: - for index, line in enumerate(f): - if index == cgroup_v2_index: - cgroup_v2 = line[3:-1] - return cgroup_v2 - except FileNotFoundError: - return '' - - -def pid_to_starttime(pid): - """ handle FNF error! - """ - try: - starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[ - 2].split(' ')[20] - - except UnicodeDecodeError: - # print('LOL') - with open('/proc/' + pid + '/stat', 'rb') as f: - starttime = f.read().decode('utf-8', 'ignore').rpartition( - ')')[2].split(' ')[20] - - return float(starttime) / SC_CLK_TCK - - -def get_victim_id(pid): - """victim_id is starttime + pid""" - try: - return rline1('/proc/' + pid + '/stat').rpartition( - ')')[2].split(' ')[20] + '_pid' + pid - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - - -def pid_to_state(pid): - """ Handle FNF error! (BTW it already handled in find_victim_info()) - """ - return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1] - - -def pid_to_name(pid): - """ - """ - try: - with open('/proc/' + pid + '/comm', 'rb') as f: - return f.read().decode('utf-8', 'ignore')[:-1] - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - - -def pid_to_ppid(pid): - """ - """ - try: - with open('/proc/' + pid + '/status') as f: - for n, line in enumerate(f): - if n is ppid_index: - return line.split('\t')[1].strip() - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - except UnicodeDecodeError: - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - for i in range(len(f_list)): - if i is ppid_index: - return f_list[i].split('\t')[1] - - -def pid_to_ancestry(pid, max_ancestry_depth=1): - """ - """ - if max_ancestry_depth == 1: - ppid = pid_to_ppid(pid) - pname = pid_to_name(ppid) - return '\n PPID: {} ({})'.format(ppid, pname) - if max_ancestry_depth == 0: - return '' - anc_list = [] - for i in range(max_ancestry_depth): - ppid = pid_to_ppid(pid) - pname = pid_to_name(ppid) - anc_list.append((ppid, pname)) - if ppid == '1': - break - pid = ppid - a = '' - for i in anc_list: - a = a + ' <= PID {} ({})'.format(i[0], i[1]) - return '\n Ancestry: ' + a[4:] - - -def pid_to_cmdline(pid): - """ - Get process cmdline by pid. - - pid: str pid of required process - returns string cmdline - """ - try: - with open('/proc/' + pid + '/cmdline') as f: - return f.read().replace('\x00', ' ').rstrip() - except FileNotFoundError: - return '' - - -def pid_to_environ(pid): - """ - Get process environ by pid. - - pid: str pid of required process - returns string environ - """ - try: - with open('/proc/' + pid + '/environ') as f: - return f.read().replace('\x00', ' ').rstrip() - except FileNotFoundError: - return '' - - -def pid_to_realpath(pid): - try: - return os.path.realpath('/proc/' + pid + '/exe') - except FileNotFoundError: - return '' - - -def pid_to_uid(pid): - """return euid""" - try: - with open('/proc/' + pid + '/status') as f: - for n, line in enumerate(f): - if n is uid_index: - return line.split('\t')[2] - except UnicodeDecodeError: - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - return f_list[uid_index].split('\t')[2] - except FileNotFoundError: - return '' - - -def pid_to_badness(pid): - """Find and modify badness (if it needs).""" - - try: - - oom_score = int(rline1('/proc/' + pid + '/oom_score')) - badness = oom_score - - if decrease_oom_score_adj: - oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj')) - if badness > oom_score_adj_max and oom_score_adj > 0: - badness = badness - oom_score_adj + oom_score_adj_max - - if regex_matching: - name = pid_to_name(pid) - for re_tup in processname_re_list: - if search(re_tup[1], name) is not None: - badness += int(re_tup[0]) - - if re_match_cgroup_v1: - cgroup_v1 = pid_to_cgroup_v1(pid) - for re_tup in cgroup_v1_re_list: - if search(re_tup[1], cgroup_v1) is not None: - badness += int(re_tup[0]) - - if re_match_cgroup_v2: - cgroup_v2 = pid_to_cgroup_v2(pid) - for re_tup in cgroup_v2_re_list: - if search(re_tup[1], cgroup_v2) is not None: - badness += int(re_tup[0]) - - if re_match_realpath: - realpath = pid_to_realpath(pid) - for re_tup in realpath_re_list: - if search(re_tup[1], realpath) is not None: - badness += int(re_tup[0]) - - if re_match_cmdline: - cmdline = pid_to_cmdline(pid) - for re_tup in cmdline_re_list: - if search(re_tup[1], cmdline) is not None: - badness += int(re_tup[0]) - - if re_match_environ: - environ = pid_to_environ(pid) - for re_tup in environ_re_list: - if search(re_tup[1], environ) is not None: - badness += int(re_tup[0]) - - if re_match_uid: - uid = pid_to_uid(pid) - for re_tup in uid_re_list: - if search(re_tup[1], uid) is not None: - badness += int(re_tup[0]) - - if forbid_negative_badness: - if badness < 0: - badness = 0 - - return badness, oom_score - - except FileNotFoundError: - return None, None - except ProcessLookupError: - return None, None - - -def pid_to_status(pid): - """ - """ - - try: - - with open('/proc/' + pid + '/status') as f: - - for n, line in enumerate(f): - - if n is 0: - name = line.split('\t')[1][:-1] - - if n is state_index: - state = line.split('\t')[1][0] - continue - - if n is ppid_index: - ppid = line.split('\t')[1][:-1] - continue - - if n is uid_index: - uid = line.split('\t')[2] - continue - - if n is vm_size_index: - vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if n is vm_rss_index: - vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if n is vm_swap_index: - vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) - break - - return name, state, ppid, uid, vm_size, vm_rss, vm_swap - - except UnicodeDecodeError: - return pid_to_status_unicode(pid) - - except FileNotFoundError: - return None - - except ProcessLookupError: - return None - - except ValueError: - return None - - -def pid_to_status_unicode(pid): - """ - """ - try: - - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - - for i in range(len(f_list)): - - if i is 0: - name = f_list[i].split('\t')[1] - - if i is state_index: - state = f_list[i].split('\t')[1][0] - - if i is ppid_index: - ppid = f_list[i].split('\t')[1] - - if i is uid_index: - uid = f_list[i].split('\t')[2] - - if i is vm_size_index: - vm_size = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is vm_rss_index: - vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) - - if i is vm_swap_index: - vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) - - return name, state, ppid, uid, vm_size, vm_rss, vm_swap - - except FileNotFoundError: - return None - - except ProcessLookupError: - return None - - except ValueError: - return None - - -########################################################################## - - -def uptime(): - """ - """ - return float(rline1('/proc/uptime').split(' ')[0]) - - -def errprint(*text): - """ - """ - print(*text, file=stderr, flush=True) - - -def mlockall(): - """Lock all memory to prevent swapping nohang process.""" - - MCL_CURRENT = 1 - MCL_FUTURE = 2 - MCL_ONFAULT = 4 - - libc = CDLL('libc.so.6', use_errno=True) - - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT - ) - if result != 0: - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE - ) - if result != 0: - log('WARNING: cannot lock all memory') - else: - pass - # log('All memory locked with MCL_CURRENT | MCL_FUTURE') - else: - pass - # log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT') - - -def update_stat_dict_and_print(key): - """ - """ - - if key is not None: - - if key not in stat_dict: - - stat_dict.update({key: 1}) - - else: - - new_value = stat_dict[key] + 1 - stat_dict.update({key: new_value}) - - if print_total_stat: - - stats_msg = 'Total stat (what happened in the last {}):'.format( - format_time(time() - start_time)) - - for i in stat_dict: - stats_msg += '\n {}: {}'.format(i, stat_dict[i]) - - log(stats_msg) - - -def find_psi_metrics_value(psi_path, psi_metrics): - """ - """ - - if psi_support: - - if psi_metrics == 'some_avg10': - return float(rline1(psi_path).split(' ')[1].split('=')[1]) - if psi_metrics == 'some_avg60': - return float(rline1(psi_path).split(' ')[2].split('=')[1]) - if psi_metrics == 'some_avg300': - return float(rline1(psi_path).split(' ')[3].split('=')[1]) - - if psi_metrics == 'full_avg10': - with open(psi_path) as f: - psi_list = f.readlines() - return float(psi_list[1].split(' ')[1].split('=')[1]) - if psi_metrics == 'full_avg60': - with open(psi_path) as f: - psi_list = f.readlines() - return float(psi_list[1].split(' ')[2].split('=')[1]) - if psi_metrics == 'full_avg300': - with open(psi_path) as f: - psi_list = f.readlines() - return float(psi_list[1].split(' ')[3].split('=')[1]) - - -def check_mem_and_swap(): - """find mem_available, swap_total, swap_free""" - with open('/proc/meminfo') as f: - for n, line in enumerate(f): - if n is 2: - mem_available = int(line.split(':')[1][:-4]) - continue - if n is swap_total_index: - swap_total = int(line.split(':')[1][:-4]) - continue - if n is swap_free_index: - swap_free = int(line.split(':')[1][:-4]) - break - return mem_available, swap_total, swap_free - - -def check_zram(): - """find MemUsedZram""" - disksize_sum = 0 - mem_used_total_sum = 0 - - for dev in os.listdir('/sys/block'): - if dev.startswith('zram'): - stat = zram_stat(dev) - disksize_sum += int(stat[0]) - mem_used_total_sum += int(stat[1]) - - # Means that when setting zram disksize = 1 GiB available memory - # decrease by 0.0042 GiB. - # Found experimentally, requires clarification with different kernaels and - # architectures. - # On small disk drives (up to gigabyte) it can be more, up to 0.0045. - # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should - # be 0.001: - # ("zram uses about 0.1% of the size of the disk" - # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt), - # but this statement contradicts the experimental data. - # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize - # Found experimentally. - ZRAM_DISKSIZE_FACTOR = 0.0042 - - return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0 - - -def format_time(t): - """ - """ - t = int(t) - if t < 60: - return '{} sec'.format(t) - elif t >= 60 and t < 3600: - m = t // 60 - s = t % 60 - return '{} min {} sec'.format(m, s) - else: - h = t // 3600 - s0 = t - h * 3600 - m = s0 // 60 - s = s0 % 60 - return '{} h {} min {} sec'.format(h, m, s) - - -def string_to_float_convert_test(string): - """Try to interprete string values as floats.""" - try: - return float(string) - except ValueError: - return None - - -def string_to_int_convert_test(string): - """Try to interpret string values as integers.""" - try: - return int(string) - except ValueError: - return None - - -def conf_parse_string(param): - """ - Get string parameters from the config dict. - - param: config_dict key - returns config_dict[param].strip() - """ - if param in config_dict: - return config_dict[param].strip() - else: - errprint('All the necessary parameters must be in the config') - errprint('There is no "{}" parameter in the config'.format(param)) - exit(1) - - -def conf_parse_bool(param): - """ - Get bool parameters from the config_dict. - - param: config_dicst key - returns bool - """ - if param in config_dict: - param_str = config_dict[param] - if param_str == 'True': - return True - elif param_str == 'False': - return False - else: - errprint('Invalid value of the "{}" parameter.'.format(param)) - errprint('Valid values are True and False.') - errprint('Exit') - exit(1) - else: - errprint('All the necessary parameters must be in the config') - errprint('There is no "{}" parameter in the config'.format(param)) - exit(1) - - -def rline1(path): - """read 1st line from path.""" - try: - with open(path) as f: - for line in f: - return line[:-1] - except UnicodeDecodeError: - with open(path, 'rb') as f: - return f.read(999).decode( - 'utf-8', 'ignore').split('\n')[0] # use partition()! - - -def kib_to_mib(num): - """Convert KiB values to MiB values.""" - return round(num / 1024.0) - - -def percent(num): - """Interprete num as percentage.""" - return round(num * 100, 1) - - -def just_percent_mem(num): - """convert num to percent and justify""" - return str(round(num * 100, 1)).rjust(4, ' ') - - -def just_percent_swap(num): - """ - """ - return str(round(num * 100, 1)).rjust(5, ' ') - - -def human(num, lenth): - """Convert KiB values to MiB values with right alignment""" - return str(round(num / 1024)).rjust(lenth, ' ') - - -def zram_stat(zram_id): - """ - Get zram state. - - zram_id: str zram block-device id - returns bytes diskcize, str mem_used_total - """ - try: - disksize = rline1('/sys/block/' + zram_id + '/disksize') - except FileNotFoundError: - return '0', '0' - if disksize == ['0\n']: - return '0', '0' - try: - mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ') - mm_stat_list = [] - for i in mm_stat: - if i != '': - mm_stat_list.append(i) - mem_used_total = mm_stat_list[2] - except FileNotFoundError: - mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total') - return disksize, mem_used_total # BYTES, str - - -def send_notify_warn(): - """ - Look for process with maximum 'badness' and warn user with notification. - (implement Low memory warnings) - """ - log('Warning threshold exceeded') - - if check_warning_exe: - exe(warning_exe) - - else: - - title = 'Low memory' - - body = 'MemAvail: {}%\nSwapFree: {}%'.format( - round(mem_available / mem_total * 100), - round(swap_free / (swap_total + 0.1) * 100) - ) - - send_notification(title, body) - - -def send_notify(signal, name, pid): - """ - Notificate about OOM Preventing. - - signal: key for notify_sig_dict - name: str process name - pid: str process pid - """ - - # wait for memory release after corrective action - # may be useful if free memory was about 0 immediately after - # corrective action - sleep(0.05) - - title = 'Freeze prevention' - body = '{} [{}] {}'.format( - notify_sig_dict[signal], - pid, - name.replace( - # symbol '&' can break notifications in some themes, - # therefore it is replaced by '*' - '&', '*' - ) - ) - - send_notification(title, body) - - -def send_notify_etc(pid, name, command): - """ - Notificate about OOM Preventing. - - command: str command that will be executed - name: str process name - pid: str process pid - """ - title = 'Freeze prevention' - body = 'Victim is [{}] {}\nExecute the co' \ - 'mmand:\n{}'.format( - pid, name.replace('&', '*'), command.replace('&', '*')) - - send_notification(title, body) - - -def send_notification(title, body): - """ - """ - split_by = '#' * 16 - - t000 = time() - - path_to_cache = '/dev/shm/nohang_notify_cache_uid{}_time{}'.format( - str(self_uid), t000 - ) - - text = '{}{}{}'.format(title, split_by, body) - - try: - with open(path_to_cache, 'w') as f: - f.write(text) - os.chmod(path_to_cache, 0o600) - except OSError: - log('OSError while send notification ' - '(No space left on device: /dev/shm)') - return None - - cmd = '{} --uid {} --time {} &'.format(notify_helper_path, self_uid, t000) - - exe(cmd) - - -def get_pid_list(): - """ - Find pid list expect kthreads and zombies - """ - pid_list = [] - for pid in os.listdir('/proc'): - if os.path.exists('/proc/' + pid + '/exe') is True: - pid_list.append(pid) - return pid_list - - -pid_list = get_pid_list() - - -def get_non_decimal_pids(): - """ - """ - non_decimal_list = [] - for pid in pid_list: - if pid[0].isdecimal() is False: - non_decimal_list.append(pid) - return non_decimal_list - - -def find_victim(_print_proc_table): - """ - Find the process with highest badness and its badness adjustment - Return pid and badness - """ - - ft1 = time() - - pid_list = get_pid_list() - - pid_list.remove(self_pid) - - if '1' in pid_list: - pid_list.remove('1') - - non_decimal_list = get_non_decimal_pids() - - for i in non_decimal_list: - if i in pid_list: - pid_list.remove(i) - - pid_badness_list = [] - - if _print_proc_table: - - if extra_table_info == 'None': - extra_table_title = '' - - elif extra_table_info == 'cgroup_v1': - extra_table_title = 'CGroup_v1' - - elif extra_table_info == 'cgroup_v2': - extra_table_title = 'CGroup_v2' - - elif extra_table_info == 'cmdline': - extra_table_title = 'cmdline' - - elif extra_table_info == 'environ': - extra_table_title = 'environ' - - elif extra_table_info == 'realpath': - extra_table_title = 'realpath' - - elif extra_table_info == 'All': - extra_table_title = '[CGroup] [CmdLine] [RealPath]' - else: - extra_table_title = '' - - hr = '#' * 115 - - log(hr) - log('# PID PPID badness oom_score oom_score_adj e' - 'UID S VmSize VmRSS VmSwap Name {}'.format( - extra_table_title)) - log('#------- ------- ------- --------- ------------- -------' - '--- - ------ ----- ------ --------------- --------') - - for pid in pid_list: - - badness = pid_to_badness(pid)[0] - - if badness is None: - continue - - if _print_proc_table: - - try: - oom_score = rline1('/proc/' + pid + '/oom_score') - oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') - except FileNotFoundError: - continue - - if pid_to_status(pid) is None: - continue - else: - (name, state, ppid, uid, vm_size, vm_rss, - vm_swap) = pid_to_status(pid) - - if extra_table_info == 'None': - extra_table_line = '' - - elif extra_table_info == 'cgroup_v1': - extra_table_line = pid_to_cgroup_v1(pid) - - elif extra_table_info == 'cgroup_v2': - extra_table_line = pid_to_cgroup_v2(pid) - - elif extra_table_info == 'cmdline': - extra_table_line = pid_to_cmdline(pid) - - elif extra_table_info == 'environ': - extra_table_line = pid_to_environ(pid) - - elif extra_table_info == 'realpath': - extra_table_line = pid_to_realpath(pid) - - elif extra_table_info == 'All': - extra_table_line = '[CG: {}] [CL: {}] [RP: {}]'.format( - pid_to_cgroup_v1(pid), - pid_to_cmdline(pid), - pid_to_realpath(pid) - ) - else: - extra_table_line = '' - - log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format( - pid.rjust(7), - ppid.rjust(7), - str(badness).rjust(7), - oom_score.rjust(9), - oom_score_adj.rjust(13), - uid.rjust(10), - state, - str(vm_size).rjust(6), - str(vm_rss).rjust(5), - str(vm_swap).rjust(6), - name.ljust(15), - extra_table_line - ) - ) - - pid_badness_list.append((pid, badness)) - - real_proc_num = len(pid_badness_list) - - # Make list of (pid, badness) tuples, sorted by 'badness' values - # print(pid_badness_list) - pid_tuple_list = sorted( - pid_badness_list, - key=itemgetter(1), - reverse=True - )[0] - - pid = pid_tuple_list[0] - - # Get maximum 'badness' value - victim_badness = pid_tuple_list[1] - victim_name = pid_to_name(pid) - - if _print_proc_table: - log(hr) - - log('Found {} processes with existing /proc/[pid]/exe'.format( - real_proc_num)) - - log( - 'Process with highest badness (found in {} ms):\n PID: {}, Na' - 'me: {}, badness: {}'.format( - round((time() - ft1) * 1000), - pid, - victim_name, - victim_badness - ) - ) - - return pid, victim_badness, victim_name - - -def find_victim_info(pid, victim_badness, name): - """ - """ - status0 = time() - - try: - - with open('/proc/' + pid + '/status') as f: - - for n, line in enumerate(f): - - if n is state_index: - state = line.split('\t')[1].rstrip() - continue - - if n is ppid_index: - ppid = line.split('\t')[1] - continue - - if n is uid_index: - uid = line.split('\t')[2] - continue - - if n is vm_size_index: - vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if n is vm_rss_index: - vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if detailed_rss: - - if n is anon_index: - anon_rss = kib_to_mib( - int(line.split('\t')[1][:-4])) - continue - - if n is file_index: - file_rss = kib_to_mib( - int(line.split('\t')[1][:-4])) - continue - - if n is shmem_index: - shmem_rss = kib_to_mib( - int(line.split('\t')[1][:-4])) - continue - - if n is vm_swap_index: - vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) - break - - cmdline = pid_to_cmdline(pid) - oom_score = rline1('/proc/' + pid + '/oom_score') - oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') - - except FileNotFoundError: - log('The victim died in the search process: FileNotFoundError') - update_stat_dict_and_print( - 'The victim died in the search process: FileNotFoundError') - return None - except ProcessLookupError: - log('The victim died in the search process: ProcessLookupError') - update_stat_dict_and_print( - 'The victim died in the search process: ProcessLookupError') - return None - except UnicodeDecodeError: - - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - - for i in range(len(f_list)): - - if i is state_index: - state = f_list[i].split('\t')[1].rstrip() - - if i is ppid_index: - ppid = f_list[i].split('\t')[1] - - if i is uid_index: - uid = f_list[i].split('\t')[2] - - if i is vm_size_index: - vm_size = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is vm_rss_index: - vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) - - if detailed_rss: - - if i is anon_index: - anon_rss = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is file_index: - file_rss = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is shmem_index: - shmem_rss = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is vm_swap_index: - vm_swap = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - cmdline = pid_to_cmdline(pid) - oom_score = rline1('/proc/' + pid + '/oom_score') - oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') - - except IndexError: - log('The victim died in the search process: IndexError') - update_stat_dict_and_print( - 'The victim died in the search process: IndexError') - return None - except ValueError: - log('The victim died in the search process: ValueError') - update_stat_dict_and_print( - 'The victim died in the search process: ValueError') - return None - except FileNotFoundError: - log('The victim died in the search process: FileNotFoundError') - update_stat_dict_and_print( - 'The victim died in the search process: FileNotFoundError') - return None - except ProcessLookupError: - log('The victim died in the search process: ProcessLookupError') - update_stat_dict_and_print( - 'The victim died in the search process: ProcessLookupError') - return None - - len_vm = len(str(vm_size)) - - try: - realpath = os.path.realpath('/proc/' + pid + '/exe') - victim_lifetime = format_time(uptime() - pid_to_starttime(pid)) - victim_cgroup_v1 = pid_to_cgroup_v1(pid) - victim_cgroup_v2 = pid_to_cgroup_v2(pid) - - except FileNotFoundError: - print('The victim died in the search process: FileNotFoundError') - update_stat_dict_and_print( - 'The victim died in the search process: FileNotFoundError') - return None - - ancestry = pid_to_ancestry(pid, max_ancestry_depth) - - if detailed_rss: - detailed_rss_info = ' (' \ - 'Anon: {} MiB, ' \ - 'File: {} MiB, ' \ - 'Shmem: {} MiB)'.format( - anon_rss, - file_rss, - shmem_rss) - else: - detailed_rss_info = '' - - victim_info = 'Victim information (found in {} ms):' \ - '\n Name: {}' \ - '\n State: {}' \ - '\n PID: {}' \ - '{}' \ - '\n EUID: {}' \ - '\n badness: {}, ' \ - 'oom_score: {}, ' \ - 'oom_score_adj: {}' \ - '\n VmSize: {} MiB' \ - '\n VmRSS: {} MiB {}' \ - '\n VmSwap: {} MiB' \ - '\n CGroup_v1: {}' \ - '\n CGroup_v2: {}' \ - '\n Realpath: {}' \ - '\n Cmdline: {}' \ - '\n Lifetime: {}'.format( - round((time() - status0) * 1000), - name, - state, - pid, - ancestry, - uid, - victim_badness, - oom_score, - oom_score_adj, - vm_size, - str(vm_rss).rjust(len_vm), - detailed_rss_info, - str(vm_swap).rjust(len_vm), - victim_cgroup_v1, - victim_cgroup_v2, - realpath, - cmdline, - victim_lifetime) - - return victim_info - - - - - - - - - - - - -def implement_corrective_action(signal): - """ - Find victim with highest badness and send SIGTERM/SIGKILL - """ - - - # выходим из фции, если для SIGTERM порога не превышено время min_delay_after_sigterm и спим в течение over_sleep - if signal is SIGTERM: - - dt = time() - actions_time_dict['action_handled'][0] - - if dt < min_delay_after_sigterm: - pass - # print('min_delay_after_sigterm IS NOT EXCEEDED ({} < {})'.format(round(dt, 3), min_delay_after_sigterm)) - - if print_sleep_periods: - pass - # log('Sleep {} sec [in implement_corrective_action()]'.format(over_sleep)) - - sleep(over_sleep) - - return None # время задержки между действиями не истекло - else: - pass - # print('min_delay_after_sigterm IS EXCEEDED, it is time to action') - - - - - """ - - При заходе в фцию проверяем права на сигтерм. Права на сигкилл у нас всегда есть - (потому что идем дальше только после полн освободж памяти после смерти жертвы) - - actions_time_dict[action_handled] = time() - actions_time_dict[veto] = True - - actions_time_dict['action_handled'] = [time(), victim_id] - - - - """ - - - # log(mem_info) - - pid, victim_badness, name = find_victim(print_proc_table) - - if victim_badness >= min_badness: - - if print_victim_info: - victim_info = find_victim_info(pid, victim_badness, name) - log(victim_info) - - - - - # пороги могли превысиься за время поиска жертвы (поиск может занимать сотни миллисекунд) - mem_available, swap_total, swap_free = check_mem_and_swap() - - ma_mib = int(mem_available) / 1024.0 - sf_mib = int(swap_free) / 1024.0 - log('Memory status before implementing a corrective act' - 'ion:\n MemAvailable' - ': {} MiB, SwapFree: {} MiB'.format( - round(ma_mib, 1), round(sf_mib, 1) - ) - ) - - if (mem_available <= mem_min_sigkill_kb and - swap_free <= swap_min_sigkill_kb): - log('Hard threshold exceeded') - signal = SIGKILL - - - - victim_id = get_victim_id(pid) - - - - - - - # kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ - # ЗАДАННОГО ВРЕМЕНИ - - # переопределяем сигнал для старых жертв - if signal is SIGTERM: - - if victim_id in victim_dict: - - dt = time() - victim_dict[victim_id] - - if dt > max_post_sigterm_victim_lifetime: - pass - # print('max_post_sigterm_victim_lifetime exceeded: the victim will get SIGKILL') - signal = SIGKILL - - - - - - - - - - - - # matching with re to customize corrective actions - soft_match = False - - if soft_actions and signal is SIGTERM: - name = pid_to_name(pid) - cgroup_v1 = pid_to_cgroup_v1(pid) - service = '' - cgroup_v1_tail = cgroup_v1.rpartition('/')[2] - if cgroup_v1_tail.endswith('.service'): - service = cgroup_v1_tail - for i in soft_actions_list: - unit = i[0] - if unit == 'name': - u = name - else: - u = cgroup_v1 - regexp = i[1] - command = i[2] - if search(regexp, u) is not None: - log("Regexp '{}' matches with {} '{}'".format( - regexp, unit, u)) - soft_match = True - break - - if soft_match: - - # todo: make new func - m = check_mem_and_swap() - ma = int(m[0]) / 1024.0 - sf = int(m[2]) / 1024.0 - log('Memory status before implementing a corrective act' - 'ion:\n MemAvailable' - ': {} MiB, SwapFree: {} MiB'.format( - round(ma, 1), round(sf, 1) - ) - ) - - cmd = command.replace( - '$PID', - pid).replace( - '$NAME', - pid_to_name(pid)).replace( - '$SERVICE', - service) - - exit_status = exe(cmd) - - exit_status = str(exit_status) - - response_time = time() - time0 - - # тут надо, как и при дефолтном действии, проверять существование жертвы, ее реакцию на действие, - # и время ее смерти в случае успеха, о обновление таймстемпов действия - - etc_info = 'Implement a corrective act' \ - 'ion:\n Run the command: {}' \ - '\n Exit status: {}; total response ' \ - 'time: {} ms'.format( - cmd, - exit_status, - round(response_time * 1000)) - - log(etc_info) - - key = "Run the command '{}'".format(cmd) - update_stat_dict_and_print(key) - - if gui_notifications: - send_notify_etc( - pid, - name, - command.replace('$PID', pid).replace( - '$NAME', pid_to_name(pid))) - - - - - - - - - else: - - # обычное действие через сигнал - try: - - - os.kill(int(pid), signal) - kill_timestamp = time() - response_time = kill_timestamp - time0 - - - - - - - - while True: - exe_exists = os.path.exists('/proc/{}/exe'.format(pid)) - rss = pid_to_rss(pid) - dt = time() - kill_timestamp - # log('Victim VmRSS: {} KiB'.format(rss)) - if not exe_exists or rss == 0 or dt > 0.01: - #print(dt) - break - sleep(0.001) - - if dt > 0.01: - # log('Timer (value = 0.01 sec) expired; seems like the victim handles signal') - - actions_time_dict['action_handled'] = [time(), get_victim_id(pid)] - - - if victim_id not in victim_dict: # хз как надо. - victim_dict.update({victim_id: time()}) - - - # log('actions_time_dict', actions_time_dict) - # log('victim_dict', victim_dict) - - - - - else: - log('Process exited (VmRSS = 0) in {} sec'.format( - round(dt, 5))) - - - - - - - - if signal is SIGKILL or not exe_exists or rss == 0: - - while True: - sleep(0.001) - rss = pid_to_rss(pid) # рсс не важен когда путь не существует. Проверяй просто существование пид. - if rss is None: - break - t1 = time() - kill_duration = t1 - kill_timestamp - log('The victim died in {} sec'.format( - round(kill_duration, 3))) - - - mem_available, swap_total, swap_free = check_mem_and_swap() - - ma_mib = int(mem_available) / 1024.0 - sf_mib = int(swap_free) / 1024.0 - log('Memory status after implementing a corrective act' - 'ion:\n MemAvailable' - ': {} MiB, SwapFree: {} MiB'.format( - round(ma_mib, 1), round(sf_mib, 1) - ) - ) - - - - - - - - send_result = 'total response time: {} ms'.format( - round(response_time * 1000)) - - preventing_oom_message = 'Implement a corrective action:' \ - '\n Send {} to the victim; {}'.format( - sig_dict[signal], send_result) - - key = 'Send {} to {}'.format(sig_dict[signal], name) - - if signal is SIGKILL and post_kill_exe != '': - - cmd = post_kill_exe.replace('$PID', pid).replace( - '$NAME', pid_to_name(pid)) - - log('Execute post_kill_exe') - - exe(cmd) - - if gui_notifications: - send_notify(signal, name, pid) - - except FileNotFoundError: - response_time = time() - time0 - send_result = 'no such process; response time: {} ms'.format( - round(response_time * 1000)) - key = 'FileNotFoundError (the victim died in the se' \ - 'arch process): ' - except ProcessLookupError: - response_time = time() - time0 - send_result = 'no such process; response time: {} ms'.format( - round(response_time * 1000)) - key = 'ProcessLookupError (the victim died in the se' \ - 'arch process): ' - - try: - log(preventing_oom_message) - - except UnboundLocalError: - preventing_oom_message = key - - update_stat_dict_and_print(key) - - else: - - response_time = time() - time0 - victim_badness_is_too_small = 'victim badness {} < min_b' \ - 'adness {}; nothing to do; response time: {} ms'.format( - victim_badness, - min_badness, - round(response_time * 1000)) - - log(victim_badness_is_too_small) - - # update stat_dict - key = 'victim badness < min_badness' - update_stat_dict_and_print(key) - - # тут надо поспать хорошенько. а может и счетчики поправить. - # херню несу. во-первых, внезапно может кто-то появиться c блльшим бэднес.. Далее надо минимизировать аутпут спам. - sleep(over_sleep) - - - # обновлять время не на каждый кил, а только на килл той жертвы, которая не отвечала на софт экшн. - # Вывод: ко времени действия прилагать также виктим айди. - - print('##################################################################') - - -def sleep_after_check_mem(): - """Specify sleep times depends on rates and avialable memory.""" - - if stable_sleep: - - if print_sleep_periods: - log('Sleep {} sec'.format(min_sleep)) - - sleep(min_sleep) - return None - - if mem_min_sigkill_kb < mem_min_sigterm_kb: - mem_point = mem_available - mem_min_sigterm_kb - else: - mem_point = mem_available - mem_min_sigkill_kb - - if swap_min_sigkill_kb < swap_min_sigterm_kb: - swap_point = swap_free - swap_min_sigterm_kb - else: - swap_point = swap_free - swap_min_sigkill_kb - - if swap_point < 0: - swap_point = 0 - - if mem_point < 0: - mem_point = 0 - - t_mem = mem_point / rate_mem - t_swap = swap_point / rate_swap - t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram - if t_zram < 0: - t_zram = 0 - - t_mem_swap = t_mem + t_swap - t_mem_zram = t_mem + t_zram - - if t_mem_swap <= t_mem_zram: - t = t_mem_swap - else: - t = t_mem_zram - - if t > max_sleep: - t = max_sleep - elif t < min_sleep: - t = min_sleep - else: - pass - - if print_sleep_periods: - - log( - 'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format( - round(t, 2), - round(t_mem, 2), - round(t_swap, 2), - round(t_zram, 2) - ) - ) - - try: - stdout.flush() - except OSError: - pass - - sleep(t) - - -def calculate_percent(arg_key): - """ - parse conf dict - Calculate mem_min_KEY_percent. - - Try use this one) - arg_key: str key for config_dict - returns int mem_min_percent or NoneType if got some error - """ - - if arg_key in config_dict: - mem_min = config_dict[arg_key] - - if mem_min.endswith('%'): - # truncate percents, so we have a number - mem_min_percent = mem_min[:-1].strip() - # then 'float test' - mem_min_percent = string_to_float_convert_test(mem_min_percent) - if mem_min_percent is None: - errprint('Invalid {} value, not float\nExit'.format(arg_key)) - exit(1) - # Final validations... - if mem_min_percent < 0 or mem_min_percent > 100: - errprint( - '{}, as percents value, out of ran' - 'ge [0; 100]\nExit'.format(arg_key)) - exit(1) - - # mem_min_sigterm_percent is clean and valid float percentage. Can - # translate into Kb - mem_min_kb = mem_min_percent / 100 * mem_total - mem_min_mb = round(mem_min_kb / 1024) - - elif mem_min.endswith('M'): - mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip()) - if mem_min_mb is None: - errprint('Invalid {} value, not float\nExit'.format(arg_key)) - exit(1) - mem_min_kb = mem_min_mb * 1024 - if mem_min_kb > mem_total: - errprint( - '{} value can not be greater then MemT' - 'otal ({} MiB)\nExit'.format( - arg_key, round( - mem_total / 1024))) - exit(1) - mem_min_percent = mem_min_kb / mem_total * 100 - - else: - log('Invalid {} units in config.\n Exit'.format(arg_key)) - exit(1) - mem_min_percent = None - - else: - log('{} not in config\nExit'.format(arg_key)) - exit(1) - mem_min_percent = None - - return mem_min_kb, mem_min_mb, mem_min_percent - - -########################################################################## - - -print_proc_table_flag = False - -if len(argv) == 1: - if os.path.exists('./nohang.conf'): - config = os.getcwd() + '/nohang.conf' - else: - config = '/etc/nohang/nohang.conf' - -elif len(argv) == 2: - if argv[1] == '--help' or argv[1] == '-h': - print(help_mess) - exit() - elif argv[1] == '--version' or argv[1] == '-v': - print_version() - elif argv[1] == '--test' or argv[1] == '-t': - test() - elif argv[1] == '--print-proc-table' or argv[1] == '-p': - print_proc_table_flag = True - if os.path.exists('./nohang.conf'): - config = os.getcwd() + '/nohang.conf' - else: - config = '/etc/nohang/nohang.conf' - else: - errprint('Unknown option: {}'.format(argv[1])) - exit(1) - -elif len(argv) == 3: - if argv[1] == '--config' or argv[1] == '-c': - config = argv[2] - else: - errprint('Unknown option: {}'.format(argv[1])) - exit(1) - -else: - errprint('Invalid CLI input: too many options') - exit(1) - - -########################################################################## - - -# find mem_total -# find positions of SwapFree and SwapTotal in /proc/meminfo - -with open('/proc/meminfo') as f: - mem_list = f.readlines() - -mem_list_names = [] -for s in mem_list: - mem_list_names.append(s.split(':')[0]) - -if mem_list_names[2] != 'MemAvailable': - errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied') - # exit(1) - -swap_total_index = mem_list_names.index('SwapTotal') -swap_free_index = swap_total_index + 1 - -mem_total = int(mem_list[0].split(':')[1][:-4]) - -# Get names from /proc/*/status to be able to get VmRSS and VmSwap values - -with open('/proc/self/status') as file: - status_list = file.readlines() - -status_names = [] -for s in status_list: - status_names.append(s.split(':')[0]) - -ppid_index = status_names.index('PPid') -vm_size_index = status_names.index('VmSize') -vm_rss_index = status_names.index('VmRSS') -vm_swap_index = status_names.index('VmSwap') -uid_index = status_names.index('Uid') -state_index = status_names.index('State') - - -try: - anon_index = status_names.index('RssAnon') - file_index = status_names.index('RssFile') - shmem_index = status_names.index('RssShmem') - detailed_rss = True - # print(detailed_rss, 'detailed_rss') -except ValueError: - detailed_rss = False - # print('It is not Linux 4.5+') - -########################################################################## - - -log('Config: ' + config) - - -########################################################################## - -# parsing the config with obtaining the parameters dictionary - -# conf_parameters_dict -# conf_restart_dict - -# dictionary with config options -config_dict = dict() - -processname_re_list = [] -cmdline_re_list = [] -environ_re_list = [] -uid_re_list = [] -cgroup_v1_re_list = [] -cgroup_v2_re_list = [] -realpath_re_list = [] - -soft_actions_list = [] - - -# separator for optional parameters (that starts with @) -opt_separator = '///' - - -# stupid conf parsing, need refactoring -try: - with open(config) as f: - - for line in f: - - a = line.startswith('#') - b = line.startswith('\n') - c = line.startswith('\t') - d = line.startswith(' ') - - etc = line.startswith('@SOFT_ACTION_RE_NAME') - etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1') - - if not a and not b and not c and not d and not etc and not etc2: - a = line.partition('=') - - key = a[0].strip() - value = a[2].strip() - - if key not in config_dict: - config_dict[key] = value - else: - log('ERROR: config key duplication: {}'.format(key)) - exit(1) - - if etc: - - a = line.partition('@SOFT_ACTION_RE_NAME')[ - 2].partition(opt_separator) - - a1 = 'name' - - a2 = a[0].strip() - valid_re(a2) - - a3 = a[2].strip() - - zzz = (a1, a2, a3) - - soft_actions_list.append(zzz) - - if etc2: - - a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[ - 2].partition(opt_separator) - - a1 = 'cgroup_v1' - - a2 = a[0].strip() - valid_re(a2) - - a3 = a[2].strip() - - zzz = (a1, a2, a3) - - soft_actions_list.append(zzz) - - if line.startswith('@PROCESSNAME_RE'): - a = line.partition( - '@PROCESSNAME_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - processname_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@CMDLINE_RE'): - a = line.partition( - '@CMDLINE_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - cmdline_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@UID_RE'): - a = line.partition( - '@UID_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - uid_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@CGROUP_V1_RE'): - a = line.partition( - '@CGROUP_V1_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - cgroup_v1_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@CGROUP_V2_RE'): - a = line.partition( - '@CGROUP_V2_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - cgroup_v2_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@REALPATH_RE'): - a = line.partition( - '@REALPATH_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - realpath_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@ENVIRON_RE'): - a = line.partition( - '@ENVIRON_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - environ_re_list.append((badness_adj, reg_exp)) - - -except PermissionError: - errprint('PermissionError', conf_err_mess) - exit(1) -except UnicodeDecodeError: - errprint('UnicodeDecodeError', conf_err_mess) - exit(1) -except IsADirectoryError: - errprint('IsADirectoryError', conf_err_mess) - exit(1) -except IndexError: - errprint('IndexError', conf_err_mess) - exit(1) -except FileNotFoundError: - errprint('FileNotFoundError', conf_err_mess) - exit(1) - - -if processname_re_list == []: - regex_matching = False -else: - regex_matching = True - - -if cmdline_re_list == []: - re_match_cmdline = False -else: - re_match_cmdline = True - - -if uid_re_list == []: - re_match_uid = False -else: - re_match_uid = True - - -if environ_re_list == []: - re_match_environ = False -else: - re_match_environ = True - - -if realpath_re_list == []: - re_match_realpath = False -else: - re_match_realpath = True - - -if cgroup_v1_re_list == []: - re_match_cgroup_v1 = False -else: - re_match_cgroup_v1 = True - -if cgroup_v2_re_list == []: - re_match_cgroup_v2 = False -else: - re_match_cgroup_v2 = True - - -# print(processname_re_list) -# print(cmdline_re_list) -# print(uid_re_list) -# print(environ_re_list) -# print(realpath_re_list) -# print(cgroup_v1_re_list) -# print(cgroup_v2_re_list) - -# print(soft_actions_list) - -if soft_actions_list == []: - soft_actions = False -else: - soft_actions = True - -# print('soft_actions:', soft_actions) - -########################################################################## - - -# extracting parameters from the dictionary -# check for all necessary parameters -# validation of all parameters -psi_debug = conf_parse_bool('psi_debug') -print_total_stat = conf_parse_bool('print_total_stat') -print_proc_table = conf_parse_bool('print_proc_table') -forbid_negative_badness = conf_parse_bool('forbid_negative_badness') -print_victim_info = conf_parse_bool('print_victim_info') -print_config = conf_parse_bool('print_config') -print_mem_check_results = conf_parse_bool('print_mem_check_results') -print_sleep_periods = conf_parse_bool('print_sleep_periods') -gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings') -gui_notifications = conf_parse_bool('gui_notifications') -decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj') -ignore_psi = conf_parse_bool('ignore_psi') - -(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent - ) = calculate_percent('mem_min_sigterm') - -(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent - ) = calculate_percent('mem_min_sigkill') - -(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent - ) = calculate_percent('zram_max_sigterm') - -(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent - ) = calculate_percent('zram_max_sigkill') - -(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent - ) = calculate_percent('mem_min_warnings') - -(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent - ) = calculate_percent('zram_max_warnings') - - -if 'rate_mem' in config_dict: - rate_mem = string_to_float_convert_test(config_dict['rate_mem']) - if rate_mem is None: - errprint('Invalid rate_mem value, not float\nExit') - exit(1) - if rate_mem <= 0: - errprint('rate_mem MUST be > 0\nExit') - exit(1) -else: - errprint('rate_mem not in config\nExit') - exit(1) - - -if 'rate_swap' in config_dict: - rate_swap = string_to_float_convert_test(config_dict['rate_swap']) - if rate_swap is None: - errprint('Invalid rate_swap value, not float\nExit') - exit(1) - if rate_swap <= 0: - errprint('rate_swap MUST be > 0\nExit') - exit(1) -else: - errprint('rate_swap not in config\nExit') - exit(1) - - -if 'rate_zram' in config_dict: - rate_zram = string_to_float_convert_test(config_dict['rate_zram']) - if rate_zram is None: - errprint('Invalid rate_zram value, not float\nExit') - exit(1) - if rate_zram <= 0: - errprint('rate_zram MUST be > 0\nExit') - exit(1) -else: - errprint('rate_zram not in config\nExit') - exit(1) - - -if 'swap_min_sigterm' in config_dict: - swap_min_sigterm = config_dict['swap_min_sigterm'] -else: - errprint('swap_min_sigterm not in config\nExit') - exit(1) - - -if 'swap_min_sigkill' in config_dict: - swap_min_sigkill = config_dict['swap_min_sigkill'] -else: - errprint('swap_min_sigkill not in config\nExit') - exit(1) - - -if 'min_delay_after_sigterm' in config_dict: - min_delay_after_sigterm = string_to_float_convert_test( - config_dict['min_delay_after_sigterm']) - if min_delay_after_sigterm is None: - errprint('Invalid min_delay_after_sigterm value, not float\nExit') - exit(1) - if min_delay_after_sigterm < 0: - errprint('min_delay_after_sigterm must be positiv\nExit') - exit(1) -else: - errprint('min_delay_after_sigterm not in config\nExit') - exit(1) - - -if 'psi_post_action_delay' in config_dict: - psi_post_action_delay = string_to_float_convert_test( - config_dict['psi_post_action_delay']) - if psi_post_action_delay is None: - errprint('Invalid psi_post_action_delay value, not float\nExit') - exit(1) - if psi_post_action_delay < 0: - errprint('psi_post_action_delay must be positive\nExit') - exit(1) -else: - errprint('psi_post_action_delay not in config\nExit') - exit(1) - - -if 'sigkill_psi_threshold' in config_dict: - sigkill_psi_threshold = string_to_float_convert_test( - config_dict['sigkill_psi_threshold']) - if sigkill_psi_threshold is None: - errprint('Invalid sigkill_psi_threshold value, not float\nExit') - exit(1) - if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100: - errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit') - exit(1) -else: - errprint('sigkill_psi_threshold not in config\nExit') - exit(1) - - -if 'sigterm_psi_threshold' in config_dict: - sigterm_psi_threshold = string_to_float_convert_test( - config_dict['sigterm_psi_threshold']) - if sigterm_psi_threshold is None: - errprint('Invalid sigterm_psi_threshold value, not float\nExit') - exit(1) - if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100: - errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit') - exit(1) -else: - errprint('sigterm_psi_threshold not in config\nExit') - exit(1) - - -if 'min_badness' in config_dict: - min_badness = string_to_int_convert_test( - config_dict['min_badness']) - if min_badness is None: - errprint('Invalid min_badness value, not integer\nExit') - exit(1) - if min_badness < 0 or min_badness > 1000: - errprint('Invalud min_badness value\nExit') - exit(1) -else: - errprint('min_badness not in config\nExit') - exit(1) - - -if 'oom_score_adj_max' in config_dict: - oom_score_adj_max = string_to_int_convert_test( - config_dict['oom_score_adj_max']) - if oom_score_adj_max is None: - errprint('Invalid oom_score_adj_max value, not integer\nExit') - exit(1) - if oom_score_adj_max < 0 or oom_score_adj_max > 1000: - errprint('Invalid oom_score_adj_max value\nExit') - exit(1) -else: - errprint('oom_score_adj_max not in config\nExit') - exit(1) - - -if 'min_time_between_warnings' in config_dict: - min_time_between_warnings = string_to_float_convert_test( - config_dict['min_time_between_warnings']) - if min_time_between_warnings is None: - errprint('Invalid min_time_between_warnings value, not float\nExit') - exit(1) - if min_time_between_warnings < 1 or min_time_between_warnings > 300: - errprint('min_time_between_warnings value out of range [1; 300]\nExit') - exit(1) -else: - errprint('min_time_between_warnings not in config\nExit') - exit(1) - - -if 'swap_min_warnings' in config_dict: - swap_min_warnings = config_dict['swap_min_warnings'] -else: - errprint('swap_min_warnings not in config\nExit') - exit(1) - - -if 'max_ancestry_depth' in config_dict: - max_ancestry_depth = string_to_int_convert_test( - config_dict['max_ancestry_depth']) - if min_badness is None: - errprint('Invalid max_ancestry_depth value, not integer\nExit') - exit(1) - if max_ancestry_depth < 1: - errprint('Invalud max_ancestry_depth value\nExit') - exit(1) -else: - errprint('max_ancestry_depth is not in config\nExit') - exit(1) - - -if 'max_post_sigterm_victim_lifetime' in config_dict: - max_post_sigterm_victim_lifetime = string_to_float_convert_test( - config_dict['max_post_sigterm_victim_lifetime']) - if max_post_sigterm_victim_lifetime is None: - errprint('Invalid max_post_sigterm_victim_lifetime val' - 'ue, not float\nExit') - exit(1) - if max_post_sigterm_victim_lifetime < 0: - errprint('max_post_sigterm_victim_lifetime must be non-n' - 'egative number\nExit') - exit(1) -else: - errprint('max_post_sigterm_victim_lifetime is not in config\nExit') - exit(1) - - -if 'post_kill_exe' in config_dict: - post_kill_exe = config_dict['post_kill_exe'] -else: - errprint('post_kill_exe is not in config\nExit') - exit(1) - - -if 'psi_path' in config_dict: - psi_path = config_dict['psi_path'] -else: - errprint('psi_path is not in config\nExit') - exit(1) - - -if 'psi_metrics' in config_dict: - psi_metrics = config_dict['psi_metrics'] -else: - errprint('psi_metrics is not in config\nExit') - exit(1) - - -if 'warning_exe' in config_dict: - warning_exe = config_dict['warning_exe'] - if warning_exe != '': - check_warning_exe = True - else: - check_warning_exe = False -else: - errprint('warning_exe is not in config\nExit') - exit(1) - - -if 'extra_table_info' in config_dict: - extra_table_info = config_dict['extra_table_info'] - if (extra_table_info != 'None' and - extra_table_info != 'cgroup_v1' and - extra_table_info != 'cgroup_v2' and - extra_table_info != 'cmdline' and - extra_table_info != 'environ' and - extra_table_info != 'realpath' and - extra_table_info != 'All'): - - errprint('Invalid config: invalid extra_table_info value\nExit') - exit(1) -else: - errprint('Invalid config: extra_table_info is not in config\nExit') - exit(1) - - -separate_log = conf_parse_bool('separate_log') - -if separate_log: - - import logging - from logging import basicConfig - from logging import info - - log_dir = '/var/log/nohang' - - try: - os.mkdir(log_dir) - except PermissionError: - print('ERROR: can not create log dir') - except FileExistsError: - pass - - logfile = log_dir + '/nohang.log' - - try: - with open(logfile, 'a') as f: - pass - except FileNotFoundError: - print('ERROR: log FileNotFoundError') - except PermissionError: - print('ERROR: log PermissionError') - - try: - basicConfig( - filename=logfile, - level=logging.INFO, - format="%(asctime)s: %(message)s") - except PermissionError: - errprint('ERROR: Permission denied: {}'.format(logfile)) - except FileNotFoundError: - errprint('ERROR: FileNotFoundError: {}'.format(logfile)) - - -if 'min_mem_report_interval' in config_dict: - min_mem_report_interval = string_to_float_convert_test( - config_dict['min_mem_report_interval']) - if min_mem_report_interval is None: - errprint('Invalid min_mem_report_interval value, not float\nExit') - exit(1) - if min_mem_report_interval < 0: - errprint('min_mem_report_interval must be non-negative number\nExit') - exit(1) -else: - errprint('min_mem_report_interval is not in config\nExit') - exit(1) - - -if 'max_sleep' in config_dict: - max_sleep = string_to_float_convert_test( - config_dict['max_sleep']) - if max_sleep is None: - errprint('Invalid max_sleep value, not float\nExit') - exit(1) - if max_sleep <= 0: - errprint('max_sleep must be positive number\nExit') - exit(1) -else: - errprint('max_sleep is not in config\nExit') - exit(1) - - -if 'min_sleep' in config_dict: - min_sleep = string_to_float_convert_test( - config_dict['min_sleep']) - if min_sleep is None: - errprint('Invalid min_sleep value, not float\nExit') - exit(1) - if min_sleep <= 0: - errprint('min_sleep must be positive number\nExit') - exit(1) -else: - errprint('min_sleep is not in config\nExit') - exit(1) - - -if 'over_sleep' in config_dict: - over_sleep = string_to_float_convert_test( - config_dict['over_sleep']) - if over_sleep is None: - errprint('Invalid over_sleep value, not float\nExit') - exit(1) - if over_sleep <= 0: - errprint('over_sleep must be positive number\nExit') - exit(1) -else: - errprint('over_sleep is not in config\nExit') - exit(1) - - -if max_sleep < min_sleep: - errprint( - 'max_sleep value must not exceed min_sleep value.\nExit' - ) - exit(1) - - -if min_sleep < over_sleep: - errprint( - 'min_sleep value must not exceed over_sleep value.\nExit' - ) - exit(1) - - -if max_sleep == min_sleep: - stable_sleep = True -else: - stable_sleep = False - - -if print_proc_table_flag: - - if not root: - log('WARNING: effective UID != 0; euid={}; processes with other e' - 'uids will be invisible for nohang'.format(self_uid)) - - func_print_proc_table() - - -########################################################################## - - -psi_support = os.path.exists(psi_path) - - -########################################################################## - - -# Get KiB levels if it's possible. - - -def get_swap_threshold_tuple(string): - # re (Num %, True) or (Num KiB, False) - """Returns KiB value if abs val was set in config, or tuple with %""" - # return tuple with abs and bool: (abs %, True) or (abs MiB, False) - - if string.endswith('%'): - valid = string_to_float_convert_test(string[:-1]) - if valid is None: - errprint('somewhere swap unit is not float_%') - exit(1) - - value = float(string[:-1].strip()) - if value < 0 or value > 100: - errprint('invalid value, must be from the range[0; 100] %') - exit(1) - - return value, True - - elif string.endswith('M'): - valid = string_to_float_convert_test(string[:-1]) - if valid is None: - errprint('somewhere swap unit is not float_M') - exit(1) - - value = float(string[:-1].strip()) * 1024 - if value < 0: - errprint('invalid unit in config (negative value)') - exit(1) - - return value, False - - else: - errprint( - 'Invalid config file. There are invalid units somewhere\nExit') - exit(1) - - -swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm) -swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill) -swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings) - - -swap_term_is_percent = swap_min_sigterm_tuple[1] -if swap_term_is_percent: - swap_min_sigterm_percent = swap_min_sigterm_tuple[0] -else: - swap_min_sigterm_kb = swap_min_sigterm_tuple[0] - - -swap_kill_is_percent = swap_min_sigkill_tuple[1] -if swap_kill_is_percent: - swap_min_sigkill_percent = swap_min_sigkill_tuple[0] -else: - swap_min_sigkill_kb = swap_min_sigkill_tuple[0] - - -swap_warn_is_percent = swap_min_warnings_tuple[1] -if swap_warn_is_percent: - swap_min_warnings_percent = swap_min_warnings_tuple[0] -else: - swap_min_warnings_kb = swap_min_warnings_tuple[0] - - -########################################################################## - -# outdated section, need fixes - -if print_config: - - print( - '\n1. Memory levels to respond to as an OOM threat\n[display' - 'ing these options need fix]\n') - - print('mem_min_sigterm: {} MiB, {} %'.format( - round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1))) - print('mem_min_sigkill: {} MiB, {} %'.format( - round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1))) - - print('swap_min_sigterm: {}'.format(swap_min_sigterm)) - print('swap_min_sigkill: {}'.format(swap_min_sigkill)) - - print('zram_max_sigterm: {} MiB, {} %'.format( - round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1))) - print('zram_max_sigkill: {} MiB, {} %'.format( - round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1))) - - print('\n2. The frequency of checking the level of available m' - 'emory (and CPU usage)\n') - print('rate_mem: {}'.format(rate_mem)) - print('rate_swap: {}'.format(rate_swap)) - print('rate_zram: {}'.format(rate_zram)) - - print('\n3. The prevention of killing innocent victims\n') - print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm)) - print('min_badness: {}'.format(min_badness)) - - print('decrease_oom_score_adj: {}'.format( - decrease_oom_score_adj - )) - if decrease_oom_score_adj: - print('oom_score_adj_max: {}'.format(oom_score_adj_max)) - - print('\n4. Impact on the badness of processes via matching their' - ' names, cmdlines ir UIDs with regular expressions\n') - - print('(todo)') - - print('\n5. The execution of a specific command instead of sen' - 'ding the\nSIGTERM signal\n') - - print('\n6. GUI notifications:\n- OOM prevention results and\n- low m' - 'emory warnings\n') - print('gui_notifications: {}'.format(gui_notifications)) - - print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings)) - if gui_low_memory_warnings: - print('min_time_between_warnings: {}'.format( - min_time_between_warnings)) - - print('mem_min_warnings: {} MiB, {} %'.format( - round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1))) - - print('swap_min_warnings: {}'.format(swap_min_warnings)) - - print('zram_max_warnings: {} MiB, {} %'.format( - round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1))) - - print('\n7. Output verbosity\n') - print('print_config: {}'.format(print_config)) - print('print_mem_check_results: {}'.format(print_mem_check_results)) - print('print_sleep_periods: {}\n'.format(print_sleep_periods)) - - -########################################################################## - - -# for calculating the column width when printing mem and zram -mem_len = len(str(round(mem_total / 1024.0))) - -if gui_notifications: - notify_sig_dict = {SIGKILL: 'Killing', - SIGTERM: 'Terminating'} - - -# convert rates from MiB/s to KiB/s -rate_mem = rate_mem * 1024 -rate_swap = rate_swap * 1024 -rate_zram = rate_zram * 1024 - - -warn_time_now = 0 -warn_time_delta = 1000 -warn_timer = 0 - - -########################################################################## - - -if not root: - log('WARNING: effective UID != 0; euid={}; processes with other e' - 'uids will be invisible for nohang'.format(self_uid)) - - -# Try to lock all memory - -mlockall() - -########################################################################## - - -# print_self_rss() - - -log('Monitoring has started!') - -stdout.flush() - -########################################################################## - -psi_avg_string = '' # will be overwritten if PSI monitoring enabled - - -if psi_support and not ignore_psi: - psi_t0 = time() - - -if print_mem_check_results: - - # to find delta mem - wt2 = 0 - new_mem = 0 - - # init mem report interval - report0 = 0 - - -# handle signals -for i in sig_list: - signal(i, signal_handler) - - -while True: - - if psi_support and not ignore_psi: - - psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics) - - if print_mem_check_results: - psi_avg_string = 'PSI avg value: {} | '.format( - str(psi_avg_value).rjust(6)) - - if psi_avg_value >= sigkill_psi_threshold: - sigkill_psi_exceeded = True - else: - sigkill_psi_exceeded = False - - if psi_avg_value >= sigterm_psi_threshold: - sigterm_psi_exceeded = True - else: - sigterm_psi_exceeded = False - - if time() - psi_t0 >= psi_post_action_delay: - psi_post_action_delay_exceeded = True - else: - psi_post_action_delay_exceeded = False - - if psi_debug: - log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps' - 'i_post_action_delay_exceeded: {}'.format( - sigterm_psi_exceeded, - sigkill_psi_exceeded, - psi_post_action_delay_exceeded)) - - if sigkill_psi_exceeded and psi_post_action_delay_exceeded: - time0 = time() - mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \ - 'old ({})'.format( - psi_avg_value, sigkill_psi_threshold) - - implement_corrective_action(SIGKILL) - - psi_t0 = time() - continue - - if sigterm_psi_exceeded and psi_post_action_delay_exceeded: - time0 = time() - mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \ - 'shold ({})'.format(psi_avg_value, sigterm_psi_threshold) - - implement_corrective_action(SIGTERM) - - psi_t0 = time() - continue - - mem_available, swap_total, swap_free = check_mem_and_swap() - - # if swap_min_sigkill is set in percent - if swap_kill_is_percent: - swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0 - - if swap_term_is_percent: - swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0 - - if swap_warn_is_percent: - swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0 - - mem_used_zram = check_zram() - - if print_mem_check_results: - - wt1 = time() - - delta = (mem_available + swap_free) - new_mem - - t_cycle = wt1 - wt2 - - report_delta = wt1 - report0 - - if report_delta >= min_mem_report_interval: - - mem_report = True - new_mem = mem_available + swap_free - - report0 = wt1 - - else: - mem_report = False - - wt2 = time() - - if mem_report: - - speed = delta / 1024.0 / report_delta - speed_info = ' | dMem: {} M/s'.format( - str(round(speed)).rjust(5) - ) - - # Calculate 'swap-column' width - swap_len = len(str(round(swap_total / 1024.0))) - - # Output available mem sizes - if swap_total == 0 and mem_used_zram == 0: - log('{}MemAvail: {} M, {} %{}'.format( - psi_avg_string, - human(mem_available, mem_len), - just_percent_mem(mem_available / mem_total), - speed_info - ) - ) - - elif swap_total > 0 and mem_used_zram == 0: - log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format( - psi_avg_string, - human(mem_available, mem_len), - just_percent_mem(mem_available / mem_total), - human(swap_free, swap_len), - just_percent_swap(swap_free / (swap_total + 0.1)), - speed_info - ) - ) - - else: - log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem' - 'UsedZram: {} M, {} %{}'.format( - psi_avg_string, - human(mem_available, mem_len), - just_percent_mem(mem_available / mem_total), - human(swap_free, swap_len), - just_percent_swap(swap_free / (swap_total + 0.1)), - human(mem_used_zram, mem_len), - just_percent_mem(mem_used_zram / mem_total), - speed_info - ) - ) - - if swap_total > swap_min_sigkill_kb: - swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1)) - else: - swap_sigkill_pc = '-' - - if swap_total > swap_min_sigterm_kb: - swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1)) - else: - swap_sigterm_pc = '-' - - # MEM SWAP KILL - if (mem_available <= mem_min_sigkill_kb and - swap_free <= swap_min_sigkill_kb): - time0 = time() - - mem_info = 'Hard threshold exceeded\nMemory status that requ' \ - 'ires corrective actions:' \ - '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ - 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ - 'p_min_sigkill [{} MiB, {} %]'.format( - kib_to_mib(mem_available), - percent(mem_available / mem_total), - kib_to_mib(mem_min_sigkill_kb), - percent(mem_min_sigkill_kb / mem_total), - kib_to_mib(swap_free), - percent(swap_free / (swap_total + 0.1)), - kib_to_mib(swap_min_sigkill_kb), - swap_sigkill_pc) - - implement_corrective_action(SIGKILL) - - psi_t0 = time() - continue - - # ZRAM KILL - if mem_used_zram >= zram_max_sigkill_kb: - time0 = time() - - mem_info = 'Hard threshold exceeded\nMemory status that requir' \ - 'es corrective actions:' \ - '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \ - 'kill [{} MiB, {} %]'.format( - kib_to_mib(mem_used_zram), - percent(mem_used_zram / mem_total), - kib_to_mib(zram_max_sigkill_kb), - percent(zram_max_sigkill_kb / mem_total)) - - implement_corrective_action(SIGKILL) - - psi_t0 = time() - continue - - # MEM SWAP TERM - if mem_available <= mem_min_sigterm_kb and \ - swap_free <= swap_min_sigterm_kb: - - time0 = time() - - mem_info = 'Soft threshold exceeded\nMemory status that requi' \ - 'res corrective actions:' \ - '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ - 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ - 'p_min_sigterm [{} MiB, {} %]'.format( - kib_to_mib(mem_available), - percent(mem_available / mem_total), - kib_to_mib(mem_min_sigterm_kb), - round(mem_min_sigterm_percent, 1), - kib_to_mib(swap_free), - percent(swap_free / (swap_total + 0.1)), - kib_to_mib(swap_min_sigterm_kb), - swap_sigterm_pc) - - implement_corrective_action(SIGTERM) - - psi_t0 = time() - continue - - # ZRAM TERM - if mem_used_zram >= zram_max_sigterm_kb: - time0 = time() - - mem_info = 'Soft threshold exceeded\nMemory status that requ' \ - 'ires corrective actions:' \ - '\n MemUsedZram [{} MiB, {} %] >= ' \ - 'zram_max_sigterm [{} M, {} %]'.format( - kib_to_mib(mem_used_zram), - percent(mem_used_zram / mem_total), - kib_to_mib(zram_max_sigterm_kb), - percent(zram_max_sigterm_kb / mem_total)) - - implement_corrective_action(SIGTERM) - - psi_t0 = time() - continue - - # LOW MEMORY WARNINGS - if gui_low_memory_warnings: - - if mem_available <= mem_min_warnings_kb and \ - swap_free <= swap_min_warnings_kb + 0.1 or \ - mem_used_zram >= zram_max_warnings_kb: - warn_time_delta = time() - warn_time_now - warn_time_now = time() - warn_timer += warn_time_delta - if warn_timer > min_time_between_warnings: - send_notify_warn() - warn_timer = 0 - - - - - # SLEEP BETWEEN MEM CHECKS - sleep_after_check_mem() - - - - - - - - - - diff --git a/trash/n11 b/trash/n11 deleted file mode 100755 index 6ffa8cc..0000000 --- a/trash/n11 +++ /dev/null @@ -1,3073 +0,0 @@ -#!/usr/bin/env python3 -"""A daemon that prevents OOM in Linux systems.""" - -import os -from ctypes import CDLL -from time import sleep, time -from operator import itemgetter -from sys import stdout, stderr, argv, exit, version -from re import search -from sre_constants import error as invalid_re -from signal import signal, SIGKILL, SIGTERM, SIGINT, SIGQUIT, SIGHUP - - -########################################################################## - -# define functions - -''' -def self_rss(): - """ - """ - return pid_to_status(self_pid)[5] - - -def print_self_rss(): - """ - """ - log('Self RSS: {} MiB'.format(self_rss())) -''' - - - - - - - - - - - - - -def cgroup2_root(): - """ - """ - with open('/proc/mounts') as f: - for line in f: - if ' cgroup2 ' in line: - return line[7:].rpartition(' cgroup2 ')[0] - - - - -def cgroup2_to_psi_file(cg2): - """ - """ - cg2root = cgroup2_root() - if cg2root is not None: - return cg2root + cg2 + '/memory.pressure' - - - - - -def get_psi_mem_files(cgroup2_path): - """ - """ - - path_list = [] - - for root, dirs, files in os.walk(cgroup2_path): - for file in files: - path = os.path.join(root, file) - if path.endswith('/memory.pressure'): ############# - path_list.append(path) - - return path_list - - -def psi_path_to_cgroup2(path): - """ - """ - return path.partition(i)[2][:-16] - - - - - - - - - - - - - - - - - - - - - - - - -def get_swap_threshold_tuple(string): - # re (Num %, True) or (Num KiB, False) - """Returns KiB value if abs val was set in config, or tuple with %""" - # return tuple with abs and bool: (abs %, True) or (abs MiB, False) - - if string.endswith('%'): - valid = string_to_float_convert_test(string[:-1]) - if valid is None: - errprint('somewhere swap unit is not float_%') - exit(1) - - value = float(string[:-1].strip()) - if value < 0 or value > 100: - errprint('invalid value, must be from the range[0; 100] %') - exit(1) - - return value, True - - elif string.endswith('M'): - valid = string_to_float_convert_test(string[:-1]) - if valid is None: - errprint('somewhere swap unit is not float_M') - exit(1) - - value = float(string[:-1].strip()) * 1024 - if value < 0: - errprint('invalid unit in config (negative value)') - exit(1) - - return value, False - - else: - errprint( - 'Invalid config file. There are invalid units somewhere\nExit') - exit(1) - - -def find_cgroup_indexes(): - """ Find cgroup-line positions in /proc/*/cgroup file. - """ - - cgroup_v1_index = cgroup_v2_index = None - - with open('/proc/self/cgroup') as f: - for index, line in enumerate(f): - if ':name=' in line: - cgroup_v1_index = index - if line.startswith('0::'): - cgroup_v2_index = index - - return cgroup_v1_index, cgroup_v2_index - - -def pid_to_rss(pid): - """ - """ - try: - rss = int(rline1( - '/proc/{}/statm'.format(pid)).split(' ')[1]) * SC_PAGESIZE - except IndexError: - rss = None - except FileNotFoundError: - rss = None - except ProcessLookupError: - rss = None - return rss - - -def pid_to_vm_size(pid): - """ - """ - try: - vm_size = int(rline1( - '/proc/{}/statm'.format(pid)).partition(' ')[0]) * SC_PAGESIZE - except IndexError: - vm_size = None - except FileNotFoundError: - vm_size = None - except ProcessLookupError: - vm_size = None - return vm_size - - -def signal_handler(signum, frame): - """ - """ - for i in sig_list: - signal(i, signal_handler_inner) - log('Signal handler called with the {} signal '.format( - sig_dict[signum])) - update_stat_dict_and_print(None) - log('Exit') - exit() - - -def signal_handler_inner(signum, frame): - """ - """ - log('Signal handler called with the {} signal (ignored) '.format( - sig_dict[signum])) - - -def exe(cmd): - """ - """ - log('Execute the command: {}'.format(cmd)) - t0 = time() - write_self_oom_score_adj(self_oom_score_adj_max) - err = os.system(cmd) - write_self_oom_score_adj(self_oom_score_adj_min) - dt = time() - t0 - log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3))) - return err - - -def write(path, string): - """ - """ - with open(path, 'w') as f: - f.write(string) - - -def write_self_oom_score_adj(new_value): - """ - """ - if root: - write('/proc/self/oom_score_adj', new_value) - - -def valid_re(reg_exp): - """Validate regular expression. - """ - try: - search(reg_exp, '') - except invalid_re: - log('Invalid config: invalid regexp: {}'.format(reg_exp)) - exit(1) - - -def func_print_proc_table(): - """ - """ - print_proc_table = True - find_victim(print_proc_table) - exit() - - -def log(*msg): - """ - """ - try: - print(*msg) - except OSError: - sleep(0.01) - if separate_log: - try: - info(*msg) - except OSError: - sleep(0.01) - - -def print_version(): - """ - """ - try: - v = rline1('/etc/nohang/version') - except FileNotFoundError: - v = None - if v is None: - print('Nohang unknown version') - else: - print('Nohang ' + v) - exit() - - -def test(): - """ - """ - print('\n(This option is not ready to use!)\n') - - print(version) - print(argv) - - hr = '==================================' - print(hr) - print("uptime()") - print(uptime()) - - print(hr) - print("os.uname()") - print(os.uname()) - - print(hr) - print("pid_to_starttime('self')") - print(pid_to_starttime('self')) - - print(hr) - print("get_victim_id('self')") - print(get_victim_id('self')) - - print(hr) - print("errprint('test')") - print(errprint('test')) - - print(hr) - print("mlockall()") - print(mlockall()) - - print(hr) - print("pid_to_state('2')") - print(pid_to_state('2')) - - exit() - - -def pid_to_cgroup_v1(pid): - """ - """ - cgroup_v1 = '' - try: - with open('/proc/' + pid + '/cgroup') as f: - for index, line in enumerate(f): - if index == cgroup_v1_index: - cgroup_v1 = '/' + line.partition('/')[2][:-1] - return cgroup_v1 - except FileNotFoundError: - return '' - - -def pid_to_cgroup_v2(pid): - """ - """ - cgroup_v2 = '' - try: - with open('/proc/' + pid + '/cgroup') as f: - for index, line in enumerate(f): - if index == cgroup_v2_index: - cgroup_v2 = line[3:-1] - return cgroup_v2 - except FileNotFoundError: - return '' - - -def pid_to_starttime(pid): - """ handle FNF error! - """ - try: - starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[ - 2].split(' ')[20] - - except UnicodeDecodeError: - # print('LOL') - with open('/proc/' + pid + '/stat', 'rb') as f: - starttime = f.read().decode('utf-8', 'ignore').rpartition( - ')')[2].split(' ')[20] - - return float(starttime) / SC_CLK_TCK - - -def get_victim_id(pid): - """victim_id is starttime + pid""" - try: - return rline1('/proc/' + pid + '/stat').rpartition( - ')')[2].split(' ')[20] + '_pid' + pid - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - - -def pid_to_state(pid): - """ Handle FNF error! (BTW it already handled in find_victim_info()) - """ - return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1] - - -def pid_to_name(pid): - """ - """ - try: - with open('/proc/' + pid + '/comm', 'rb') as f: - return f.read().decode('utf-8', 'ignore')[:-1] - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - - -def pid_to_ppid(pid): - """ - """ - try: - with open('/proc/' + pid + '/status') as f: - for n, line in enumerate(f): - if n is ppid_index: - return line.split('\t')[1].strip() - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - except UnicodeDecodeError: - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - for i in range(len(f_list)): - if i is ppid_index: - return f_list[i].split('\t')[1] - - -def pid_to_ancestry(pid, max_ancestry_depth=1): - """ - """ - if max_ancestry_depth == 1: - ppid = pid_to_ppid(pid) - pname = pid_to_name(ppid) - return '\n PPID: {} ({})'.format(ppid, pname) - if max_ancestry_depth == 0: - return '' - anc_list = [] - for i in range(max_ancestry_depth): - ppid = pid_to_ppid(pid) - pname = pid_to_name(ppid) - anc_list.append((ppid, pname)) - if ppid == '1': - break - pid = ppid - a = '' - for i in anc_list: - a = a + ' <= PID {} ({})'.format(i[0], i[1]) - return '\n Ancestry: ' + a[4:] - - -def pid_to_cmdline(pid): - """ - Get process cmdline by pid. - - pid: str pid of required process - returns string cmdline - """ - try: - with open('/proc/' + pid + '/cmdline') as f: - return f.read().replace('\x00', ' ').rstrip() - except FileNotFoundError: - return '' - - -def pid_to_environ(pid): - """ - Get process environ by pid. - - pid: str pid of required process - returns string environ - """ - try: - with open('/proc/' + pid + '/environ') as f: - return f.read().replace('\x00', ' ').rstrip() - except FileNotFoundError: - return '' - - -def pid_to_realpath(pid): - """ - """ - try: - return os.path.realpath('/proc/' + pid + '/exe') - except FileNotFoundError: - return '' - - -def pid_to_uid(pid): - """return euid""" - try: - with open('/proc/' + pid + '/status') as f: - for n, line in enumerate(f): - if n is uid_index: - return line.split('\t')[2] - except UnicodeDecodeError: - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - return f_list[uid_index].split('\t')[2] - except FileNotFoundError: - return '' - - -def pid_to_badness(pid): - """Find and modify badness (if it needs).""" - - try: - - oom_score = int(rline1('/proc/' + pid + '/oom_score')) - badness = oom_score - - if decrease_oom_score_adj: - oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj')) - if badness > oom_score_adj_max and oom_score_adj > 0: - badness = badness - oom_score_adj + oom_score_adj_max - - if regex_matching: - name = pid_to_name(pid) - for re_tup in processname_re_list: - if search(re_tup[1], name) is not None: - badness += int(re_tup[0]) - - if re_match_cgroup_v1: - cgroup_v1 = pid_to_cgroup_v1(pid) - for re_tup in cgroup_v1_re_list: - if search(re_tup[1], cgroup_v1) is not None: - badness += int(re_tup[0]) - - if re_match_cgroup_v2: - cgroup_v2 = pid_to_cgroup_v2(pid) - for re_tup in cgroup_v2_re_list: - if search(re_tup[1], cgroup_v2) is not None: - badness += int(re_tup[0]) - - if re_match_realpath: - realpath = pid_to_realpath(pid) - for re_tup in realpath_re_list: - if search(re_tup[1], realpath) is not None: - badness += int(re_tup[0]) - - if re_match_cmdline: - cmdline = pid_to_cmdline(pid) - for re_tup in cmdline_re_list: - if search(re_tup[1], cmdline) is not None: - badness += int(re_tup[0]) - - if re_match_environ: - environ = pid_to_environ(pid) - for re_tup in environ_re_list: - if search(re_tup[1], environ) is not None: - badness += int(re_tup[0]) - - if re_match_uid: - uid = pid_to_uid(pid) - for re_tup in uid_re_list: - if search(re_tup[1], uid) is not None: - badness += int(re_tup[0]) - - if forbid_negative_badness: - if badness < 0: - badness = 0 - - return badness, oom_score - - except FileNotFoundError: - return None, None - except ProcessLookupError: - return None, None - - -def pid_to_status(pid): - """ - """ - - try: - - with open('/proc/' + pid + '/status') as f: - - for n, line in enumerate(f): - - if n is 0: - name = line.split('\t')[1][:-1] - - if n is state_index: - state = line.split('\t')[1][0] - continue - - if n is ppid_index: - ppid = line.split('\t')[1][:-1] - continue - - if n is uid_index: - uid = line.split('\t')[2] - continue - - if n is vm_size_index: - vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if n is vm_rss_index: - vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if n is vm_swap_index: - vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) - break - - return name, state, ppid, uid, vm_size, vm_rss, vm_swap - - except UnicodeDecodeError: - return pid_to_status_unicode(pid) - - except FileNotFoundError: - return None - - except ProcessLookupError: - return None - - except ValueError: - return None - - -def pid_to_status_unicode(pid): - """ - """ - try: - - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - - for i in range(len(f_list)): - - if i is 0: - name = f_list[i].split('\t')[1] - - if i is state_index: - state = f_list[i].split('\t')[1][0] - - if i is ppid_index: - ppid = f_list[i].split('\t')[1] - - if i is uid_index: - uid = f_list[i].split('\t')[2] - - if i is vm_size_index: - vm_size = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is vm_rss_index: - vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) - - if i is vm_swap_index: - vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) - - return name, state, ppid, uid, vm_size, vm_rss, vm_swap - - except FileNotFoundError: - return None - - except ProcessLookupError: - return None - - except ValueError: - return None - - -def uptime(): - """ - """ - return float(rline1('/proc/uptime').split(' ')[0]) - - -def errprint(*text): - """ - """ - print(*text, file=stderr, flush=True) - - -def mlockall(): - """Lock all memory to prevent swapping nohang process.""" - - MCL_CURRENT = 1 - MCL_FUTURE = 2 - MCL_ONFAULT = 4 - - libc = CDLL('libc.so.6', use_errno=True) - - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT - ) - if result != 0: - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE - ) - if result != 0: - log('WARNING: cannot lock all memory') - else: - pass - # log('All memory locked with MCL_CURRENT | MCL_FUTURE') - else: - pass - # log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT') - - -def update_stat_dict_and_print(key): - """ - """ - - if key is not None: - - if key not in stat_dict: - - stat_dict.update({key: 1}) - - else: - - new_value = stat_dict[key] + 1 - stat_dict.update({key: new_value}) - - if print_total_stat: - - stats_msg = 'Total stat (what happened in the last {}):'.format( - format_time(time() - start_time)) - - for i in stat_dict: - stats_msg += '\n {}: {}'.format(i, stat_dict[i]) - - log(stats_msg) - - -def find_psi_metrics_value(psi_path, psi_metrics): - """ - """ - - if psi_support: - - if psi_metrics == 'some_avg10': - return float(rline1(psi_path).split(' ')[1].split('=')[1]) - if psi_metrics == 'some_avg60': - return float(rline1(psi_path).split(' ')[2].split('=')[1]) - if psi_metrics == 'some_avg300': - return float(rline1(psi_path).split(' ')[3].split('=')[1]) - - if psi_metrics == 'full_avg10': - with open(psi_path) as f: - psi_list = f.readlines() - return float(psi_list[1].split(' ')[1].split('=')[1]) - if psi_metrics == 'full_avg60': - with open(psi_path) as f: - psi_list = f.readlines() - return float(psi_list[1].split(' ')[2].split('=')[1]) - if psi_metrics == 'full_avg300': - with open(psi_path) as f: - psi_list = f.readlines() - return float(psi_list[1].split(' ')[3].split('=')[1]) - - -def check_mem_and_swap(): - """find mem_available, swap_total, swap_free""" - with open('/proc/meminfo') as f: - for n, line in enumerate(f): - if n is 2: - mem_available = int(line.split(':')[1][:-4]) - continue - if n is swap_total_index: - swap_total = int(line.split(':')[1][:-4]) - continue - if n is swap_free_index: - swap_free = int(line.split(':')[1][:-4]) - break - return mem_available, swap_total, swap_free - - -def check_zram(): - """find MemUsedZram""" - disksize_sum = 0 - mem_used_total_sum = 0 - - for dev in os.listdir('/sys/block'): - if dev.startswith('zram'): - stat = zram_stat(dev) - disksize_sum += int(stat[0]) - mem_used_total_sum += int(stat[1]) - - # Means that when setting zram disksize = 1 GiB available memory - # decrease by 0.0042 GiB. - # Found experimentally, requires clarification with different kernaels and - # architectures. - # On small disk drives (up to gigabyte) it can be more, up to 0.0045. - # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should - # be 0.001: - # ("zram uses about 0.1% of the size of the disk" - # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt), - # but this statement contradicts the experimental data. - # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize - # Found experimentally. - ZRAM_DISKSIZE_FACTOR = 0.0042 - - return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0 - - -def format_time(t): - """ - """ - t = int(t) - if t < 60: - return '{} sec'.format(t) - elif t >= 60 and t < 3600: - m = t // 60 - s = t % 60 - return '{} min {} sec'.format(m, s) - else: - h = t // 3600 - s0 = t - h * 3600 - m = s0 // 60 - s = s0 % 60 - return '{} h {} min {} sec'.format(h, m, s) - - -def string_to_float_convert_test(string): - """Try to interprete string values as floats.""" - try: - return float(string) - except ValueError: - return None - - -def string_to_int_convert_test(string): - """Try to interpret string values as integers.""" - try: - return int(string) - except ValueError: - return None - - -def conf_parse_string(param): - """ - Get string parameters from the config dict. - - param: config_dict key - returns config_dict[param].strip() - """ - if param in config_dict: - return config_dict[param].strip() - else: - errprint('All the necessary parameters must be in the config') - errprint('There is no "{}" parameter in the config'.format(param)) - exit(1) - - -def conf_parse_bool(param): - """ - Get bool parameters from the config_dict. - - param: config_dicst key - returns bool - """ - if param in config_dict: - param_str = config_dict[param] - if param_str == 'True': - return True - elif param_str == 'False': - return False - else: - errprint('Invalid value of the "{}" parameter.'.format(param)) - errprint('Valid values are True and False.') - errprint('Exit') - exit(1) - else: - errprint('All the necessary parameters must be in the config') - errprint('There is no "{}" parameter in the config'.format(param)) - exit(1) - - -def rline1(path): - """read 1st line from path.""" - try: - with open(path) as f: - for line in f: - return line[:-1] - except UnicodeDecodeError: - with open(path, 'rb') as f: - return f.read(999).decode( - 'utf-8', 'ignore').split('\n')[0] # use partition()! - - -def kib_to_mib(num): - """Convert KiB values to MiB values.""" - return round(num / 1024.0) - - -def percent(num): - """Interprete num as percentage.""" - return round(num * 100, 1) - - -def just_percent_mem(num): - """convert num to percent and justify""" - return str(round(num * 100, 1)).rjust(4, ' ') - - -def just_percent_swap(num): - """ - """ - return str(round(num * 100, 1)).rjust(5, ' ') - - -def human(num, lenth): - """Convert KiB values to MiB values with right alignment""" - return str(round(num / 1024)).rjust(lenth, ' ') - - -def zram_stat(zram_id): - """ - Get zram state. - - zram_id: str zram block-device id - returns bytes diskcize, str mem_used_total - """ - try: - disksize = rline1('/sys/block/' + zram_id + '/disksize') - except FileNotFoundError: - return '0', '0' - if disksize == ['0\n']: - return '0', '0' - try: - mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ') - mm_stat_list = [] - for i in mm_stat: - if i != '': - mm_stat_list.append(i) - mem_used_total = mm_stat_list[2] - except FileNotFoundError: - mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total') - return disksize, mem_used_total # BYTES, str - - -def send_notify_warn(): - """ - Look for process with maximum 'badness' and warn user with notification. - (implement Low memory warnings) - """ - log('Warning threshold exceeded') - - if check_warning_exe: - exe(warning_exe) - - else: - - title = 'Low memory' - - body = 'MemAvail: {}%\nSwapFree: {}%'.format( - round(mem_available / mem_total * 100), - round(swap_free / (swap_total + 0.1) * 100) - ) - - send_notification(title, body) - - -def send_notify(signal, name, pid): - """ - Notificate about OOM Preventing. - - signal: key for notify_sig_dict - name: str process name - pid: str process pid - """ - - # wait for memory release after corrective action - # may be useful if free memory was about 0 immediately after - # corrective action - sleep(0.05) - - title = 'Freeze prevention' - body = '{} [{}] {}'.format( - notify_sig_dict[signal], - pid, - name.replace( - # symbol '&' can break notifications in some themes, - # therefore it is replaced by '*' - '&', '*' - ) - ) - - send_notification(title, body) - - -def send_notify_etc(pid, name, command): - """ - Notificate about OOM Preventing. - - command: str command that will be executed - name: str process name - pid: str process pid - """ - title = 'Freeze prevention' - body = 'Victim is [{}] {}\nExecute the co' \ - 'mmand:\n{}'.format( - pid, name.replace('&', '*'), command.replace('&', '*')) - - send_notification(title, body) - - -def send_notification(title, body): - """ - """ - split_by = '#' * 16 - - t000 = time() - - path_to_cache = '/dev/shm/nohang_notify_cache_uid{}_time{}'.format( - str(self_uid), t000 - ) - - text = '{}{}{}'.format(title, split_by, body) - - try: - with open(path_to_cache, 'w') as f: - f.write(text) - os.chmod(path_to_cache, 0o600) - except OSError: - log('OSError while send notification ' - '(No space left on device: /dev/shm)') - return None - - cmd = '{} --uid {} --time {} &'.format(notify_helper_path, self_uid, t000) - - exe(cmd) - - -def get_pid_list(): - """ - Find pid list expect kthreads and zombies - """ - pid_list = [] - for pid in os.listdir('/proc'): - if os.path.exists('/proc/' + pid + '/exe') is True: - pid_list.append(pid) - return pid_list - - -def get_non_decimal_pids(): - """ - """ - non_decimal_list = [] - for pid in pid_list: - if pid[0].isdecimal() is False: - non_decimal_list.append(pid) - return non_decimal_list - - -def find_victim(_print_proc_table): - """ - Find the process with highest badness and its badness adjustment - Return pid and badness - """ - - ft1 = time() - - pid_list = get_pid_list() - - pid_list.remove(self_pid) - - if '1' in pid_list: - pid_list.remove('1') - - non_decimal_list = get_non_decimal_pids() - - for i in non_decimal_list: - if i in pid_list: - pid_list.remove(i) - - pid_badness_list = [] - - if _print_proc_table: - - if extra_table_info == 'None': - extra_table_title = '' - - elif extra_table_info == 'cgroup_v1': - extra_table_title = 'CGroup_v1' - - elif extra_table_info == 'cgroup_v2': - extra_table_title = 'CGroup_v2' - - elif extra_table_info == 'cmdline': - extra_table_title = 'cmdline' - - elif extra_table_info == 'environ': - extra_table_title = 'environ' - - elif extra_table_info == 'realpath': - extra_table_title = 'realpath' - - elif extra_table_info == 'All': - extra_table_title = '[CGroup] [CmdLine] [RealPath]' - else: - extra_table_title = '' - - hr = '#' * 115 - - log(hr) - log('# PID PPID badness oom_score oom_score_adj e' - 'UID S VmSize VmRSS VmSwap Name {}'.format( - extra_table_title)) - log('#------- ------- ------- --------- ------------- -------' - '--- - ------ ----- ------ --------------- --------') - - for pid in pid_list: - - badness = pid_to_badness(pid)[0] - - if badness is None: - continue - - if _print_proc_table: - - try: - oom_score = rline1('/proc/' + pid + '/oom_score') - oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') - except FileNotFoundError: - continue - - if pid_to_status(pid) is None: - continue - else: - (name, state, ppid, uid, vm_size, vm_rss, - vm_swap) = pid_to_status(pid) - - if extra_table_info == 'None': - extra_table_line = '' - - elif extra_table_info == 'cgroup_v1': - extra_table_line = pid_to_cgroup_v1(pid) - - elif extra_table_info == 'cgroup_v2': - extra_table_line = pid_to_cgroup_v2(pid) - - elif extra_table_info == 'cmdline': - extra_table_line = pid_to_cmdline(pid) - - elif extra_table_info == 'environ': - extra_table_line = pid_to_environ(pid) - - elif extra_table_info == 'realpath': - extra_table_line = pid_to_realpath(pid) - - elif extra_table_info == 'All': - extra_table_line = '[CG: {}] [CL: {}] [RP: {}]'.format( - pid_to_cgroup_v1(pid), - pid_to_cmdline(pid), - pid_to_realpath(pid) - ) - else: - extra_table_line = '' - - log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format( - pid.rjust(7), - ppid.rjust(7), - str(badness).rjust(7), - oom_score.rjust(9), - oom_score_adj.rjust(13), - uid.rjust(10), - state, - str(vm_size).rjust(6), - str(vm_rss).rjust(5), - str(vm_swap).rjust(6), - name.ljust(15), - extra_table_line - ) - ) - - pid_badness_list.append((pid, badness)) - - real_proc_num = len(pid_badness_list) - - # Make list of (pid, badness) tuples, sorted by 'badness' values - # print(pid_badness_list) - pid_tuple_list = sorted( - pid_badness_list, - key=itemgetter(1), - reverse=True - )[0] - - pid = pid_tuple_list[0] - - # Get maximum 'badness' value - victim_badness = pid_tuple_list[1] - victim_name = pid_to_name(pid) - - if _print_proc_table: - log(hr) - - log('Found {} processes with existing /proc/[pid]/exe'.format( - real_proc_num)) - - log( - 'Process with highest badness (found in {} ms):\n PID: {}, Na' - 'me: {}, badness: {}'.format( - round((time() - ft1) * 1000), - pid, - victim_name, - victim_badness - ) - ) - - return pid, victim_badness, victim_name - - -def find_victim_info(pid, victim_badness, name): - """ - """ - status0 = time() - - try: - - with open('/proc/' + pid + '/status') as f: - - for n, line in enumerate(f): - - if n is state_index: - state = line.split('\t')[1].rstrip() - continue - - if n is ppid_index: - ppid = line.split('\t')[1] - continue - - if n is uid_index: - uid = line.split('\t')[2] - continue - - if n is vm_size_index: - vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if n is vm_rss_index: - vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if detailed_rss: - - if n is anon_index: - anon_rss = kib_to_mib( - int(line.split('\t')[1][:-4])) - continue - - if n is file_index: - file_rss = kib_to_mib( - int(line.split('\t')[1][:-4])) - continue - - if n is shmem_index: - shmem_rss = kib_to_mib( - int(line.split('\t')[1][:-4])) - continue - - if n is vm_swap_index: - vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) - break - - cmdline = pid_to_cmdline(pid) - oom_score = rline1('/proc/' + pid + '/oom_score') - oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') - - except FileNotFoundError: - log('The victim died in the search process: FileNotFoundError') - update_stat_dict_and_print( - 'The victim died in the search process: FileNotFoundError') - return None - except ProcessLookupError: - log('The victim died in the search process: ProcessLookupError') - update_stat_dict_and_print( - 'The victim died in the search process: ProcessLookupError') - return None - except UnicodeDecodeError: - - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - - for i in range(len(f_list)): - - if i is state_index: - state = f_list[i].split('\t')[1].rstrip() - - if i is ppid_index: - ppid = f_list[i].split('\t')[1] - - if i is uid_index: - uid = f_list[i].split('\t')[2] - - if i is vm_size_index: - vm_size = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is vm_rss_index: - vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) - - if detailed_rss: - - if i is anon_index: - anon_rss = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is file_index: - file_rss = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is shmem_index: - shmem_rss = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is vm_swap_index: - vm_swap = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - cmdline = pid_to_cmdline(pid) - oom_score = rline1('/proc/' + pid + '/oom_score') - oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') - - except IndexError: - log('The victim died in the search process: IndexError') - update_stat_dict_and_print( - 'The victim died in the search process: IndexError') - return None - except ValueError: - log('The victim died in the search process: ValueError') - update_stat_dict_and_print( - 'The victim died in the search process: ValueError') - return None - except FileNotFoundError: - log('The victim died in the search process: FileNotFoundError') - update_stat_dict_and_print( - 'The victim died in the search process: FileNotFoundError') - return None - except ProcessLookupError: - log('The victim died in the search process: ProcessLookupError') - update_stat_dict_and_print( - 'The victim died in the search process: ProcessLookupError') - return None - - len_vm = len(str(vm_size)) - - try: - realpath = os.path.realpath('/proc/' + pid + '/exe') - victim_lifetime = format_time(uptime() - pid_to_starttime(pid)) - victim_cgroup_v1 = pid_to_cgroup_v1(pid) - victim_cgroup_v2 = pid_to_cgroup_v2(pid) - - except FileNotFoundError: - print('The victim died in the search process: FileNotFoundError') - update_stat_dict_and_print( - 'The victim died in the search process: FileNotFoundError') - return None - - ancestry = pid_to_ancestry(pid, max_ancestry_depth) - - if detailed_rss: - detailed_rss_info = ' (' \ - 'Anon: {} MiB, ' \ - 'File: {} MiB, ' \ - 'Shmem: {} MiB)'.format( - anon_rss, - file_rss, - shmem_rss) - else: - detailed_rss_info = '' - - victim_info = 'Victim information (found in {} ms):' \ - '\n Name: {}' \ - '\n State: {}' \ - '\n PID: {}' \ - '{}' \ - '\n EUID: {}' \ - '\n badness: {}, ' \ - 'oom_score: {}, ' \ - 'oom_score_adj: {}' \ - '\n VmSize: {} MiB' \ - '\n VmRSS: {} MiB {}' \ - '\n VmSwap: {} MiB' \ - '\n CGroup_v1: {}' \ - '\n CGroup_v2: {}' \ - '\n Realpath: {}' \ - '\n Cmdline: {}' \ - '\n Lifetime: {}'.format( - round((time() - status0) * 1000), - name, - state, - pid, - ancestry, - uid, - victim_badness, - oom_score, - oom_score_adj, - vm_size, - str(vm_rss).rjust(len_vm), - detailed_rss_info, - str(vm_swap).rjust(len_vm), - victim_cgroup_v1, - victim_cgroup_v2, - realpath, - cmdline, - victim_lifetime) - - return victim_info - - -def implement_corrective_action(signal): - """ - Find victim with highest badness and send SIGTERM/SIGKILL - """ - time0 = time() - - # выходим из фции, если для SIGTERM порога не превышено время - # min_delay_after_sigterm и спим в течение over_sleep - if signal is SIGTERM: - - dt = time() - actions_time_dict['action_handled'][0] - - if dt < min_delay_after_sigterm: - print('min_delay_after_sigterm IS NOT EXCEEDED ({} < {})'.format( - round(dt, 3), min_delay_after_sigterm)) - - if print_sleep_periods: - log('Sleep {} sec [in implement_corrective_action()]'.format( - over_sleep)) - - sleep(over_sleep) - - return None # время задержки между действиями не истекло - else: - print('min_delay_after_sigterm IS EXCEEDED, it is time to action') - - """ - - При заходе в фцию проверяем права на сигтерм. Права на сигкилл у нас - всегда есть - (потому что идем дальше только после полн освободж памяти после - смерти жертвы) - - actions_time_dict[action_handled] = time() - actions_time_dict[veto] = True - - actions_time_dict['action_handled'] = [time(), victim_id] - - - - """ - - log(mem_info) - - pid, victim_badness, name = find_victim(print_proc_table) - - if victim_badness >= min_badness: - - if print_victim_info: - victim_info = find_victim_info(pid, victim_badness, name) - log(victim_info) - - # пороги могли превысиься за время поиска жертвы (поиск может занимать - # сотни миллисекунд) - mem_available, swap_total, swap_free = check_mem_and_swap() - - ma_mib = int(mem_available) / 1024.0 - sf_mib = int(swap_free) / 1024.0 - log('Memory status before implementing a corrective act' - 'ion:\n MemAvailable' - ': {} MiB, SwapFree: {} MiB'.format( - round(ma_mib, 1), round(sf_mib, 1) - ) - ) - - if (mem_available <= mem_min_sigkill_kb and - swap_free <= swap_min_sigkill_kb): - log('Hard threshold exceeded') - signal = SIGKILL - - victim_id = get_victim_id(pid) - - # kill the victim if it doesn't respond to SIGTERM В ТЕЧЕНИЕ - # ЗАДАННОГО ВРЕМЕНИ - - # переопределяем сигнал для старых жертв - if signal is SIGTERM: - - if victim_id in victim_dict: - - dt = time() - victim_dict[victim_id] - - if dt > max_post_sigterm_victim_lifetime: - print('max_post_sigterm_victim_lifetime exceeded: the ' - 'victim will get SIGKILL') - signal = SIGKILL - - # matching with re to customize corrective actions - soft_match = False - - if soft_actions and signal is SIGTERM: - name = pid_to_name(pid) - cgroup_v1 = pid_to_cgroup_v1(pid) - service = '' - cgroup_v1_tail = cgroup_v1.rpartition('/')[2] - if cgroup_v1_tail.endswith('.service'): - service = cgroup_v1_tail - for i in soft_actions_list: - unit = i[0] - if unit == 'name': - u = name - else: - u = cgroup_v1 - regexp = i[1] - command = i[2] - if search(regexp, u) is not None: - log("Regexp '{}' matches with {} '{}'".format( - regexp, unit, u)) - soft_match = True - break - - if soft_match: - - # todo: make new func - m = check_mem_and_swap() - ma = int(m[0]) / 1024.0 - sf = int(m[2]) / 1024.0 - log('Memory status before implementing a corrective act' - 'ion:\n MemAvailable' - ': {} MiB, SwapFree: {} MiB'.format( - round(ma, 1), round(sf, 1) - ) - ) - - cmd = command.replace( - '$PID', - pid).replace( - '$NAME', - pid_to_name(pid)).replace( - '$SERVICE', - service) - - exit_status = exe(cmd) - - exit_status = str(exit_status) - - response_time = time() - time0 - - # тут надо, как и при дефолтном действии, проверять существование - # жертвы, ее реакцию на действие, - # и время ее смерти в случае успеха, о обновление таймстемпов - # действия - - etc_info = 'Implement a corrective act' \ - 'ion:\n Run the command: {}' \ - '\n Exit status: {}; total response ' \ - 'time: {} ms'.format( - cmd, - exit_status, - round(response_time * 1000)) - - log(etc_info) - - key = "Run the command '{}'".format(cmd) - update_stat_dict_and_print(key) - - if gui_notifications: - send_notify_etc( - pid, - name, - command.replace('$PID', pid).replace( - '$NAME', pid_to_name(pid))) - - else: - - # обычное действие через сигнал - try: - - os.kill(int(pid), signal) - kill_timestamp = time() - response_time = kill_timestamp - time0 - - while True: - exe_exists = os.path.exists('/proc/{}/exe'.format(pid)) - rss = pid_to_rss(pid) - dt = time() - kill_timestamp - log('Victim VmRSS: {} KiB'.format(rss)) - if not exe_exists or rss == 0 or dt > 0.01: - # print(dt) - break - sleep(0.001) - - if dt > 0.01: - log('Timer (value = 0.01 sec) expired; seems' - ' like the victim handles signal') - - actions_time_dict['action_handled'] = [ - time(), get_victim_id(pid)] - - if victim_id not in victim_dict: # хз как надо. - victim_dict.update({victim_id: time()}) - - # log('actions_time_dict', actions_time_dict) - # log('victim_dict', victim_dict) - - else: - log('Process exited (VmRSS = 0) in {} sec'.format( - round(dt, 5))) - - if signal is SIGKILL or not exe_exists or rss == 0: - - while True: - sleep(0.001) - # рсс не важен когда путь не существует. Проверяй - # просто существование пид. - rss = pid_to_rss(pid) - if rss is None: - break - t1 = time() - kill_duration = t1 - kill_timestamp - log('The victim died in {} sec'.format( - round(kill_duration, 3))) - - mem_available, swap_total, swap_free = check_mem_and_swap() - - ma_mib = int(mem_available) / 1024.0 - sf_mib = int(swap_free) / 1024.0 - log('Memory status after implementing a corrective act' - 'ion:\n MemAvailable' - ': {} MiB, SwapFree: {} MiB'.format( - round(ma_mib, 1), round(sf_mib, 1) - ) - ) - - send_result = 'total response time: {} ms'.format( - round(response_time * 1000)) - - preventing_oom_message = 'Implement a corrective action:' \ - '\n Send {} to the victim; {}'.format( - sig_dict[signal], send_result) - - key = 'Send {} to {}'.format(sig_dict[signal], name) - - if signal is SIGKILL and post_kill_exe != '': - - cmd = post_kill_exe.replace('$PID', pid).replace( - '$NAME', pid_to_name(pid)) - - log('Execute post_kill_exe') - - exe(cmd) - - if gui_notifications: - send_notify(signal, name, pid) - - except FileNotFoundError: - response_time = time() - time0 - send_result = 'no such process; response time: {} ms'.format( - round(response_time * 1000)) - key = 'FileNotFoundError (the victim died in the se' \ - 'arch process): ' - except ProcessLookupError: - response_time = time() - time0 - send_result = 'no such process; response time: {} ms'.format( - round(response_time * 1000)) - key = 'ProcessLookupError (the victim died in the se' \ - 'arch process): ' - - try: - log(preventing_oom_message) - - except UnboundLocalError: - preventing_oom_message = key - - update_stat_dict_and_print(key) - - else: - - response_time = time() - time0 - victim_badness_is_too_small = 'victim badness {} < min_b' \ - 'adness {}; nothing to do; response time: {} ms'.format( - victim_badness, - min_badness, - round(response_time * 1000)) - - log(victim_badness_is_too_small) - - # update stat_dict - key = 'victim badness < min_badness' - update_stat_dict_and_print(key) - - # тут надо поспать хорошенько. а может и счетчики поправить. - # херню несу. во-первых, внезапно может кто-то появиться c блльшим - # бэднес.. Далее надо минимизировать аутпут спам. - sleep(over_sleep) - - # обновлять время не на каждый кил, а только на килл той жертвы, - # которая не отвечала на софт экшн. - # Вывод: ко времени действия прилагать также виктим айди. - - print('##################################################################') - - -def sleep_after_check_mem(): - """Specify sleep times depends on rates and avialable memory.""" - - if stable_sleep: - - if print_sleep_periods: - log('Sleep {} sec'.format(min_sleep)) - - sleep(min_sleep) - return None - - if mem_min_sigkill_kb < mem_min_sigterm_kb: - mem_point = mem_available - mem_min_sigterm_kb - else: - mem_point = mem_available - mem_min_sigkill_kb - - if swap_min_sigkill_kb < swap_min_sigterm_kb: - swap_point = swap_free - swap_min_sigterm_kb - else: - swap_point = swap_free - swap_min_sigkill_kb - - if swap_point < 0: - swap_point = 0 - - if mem_point < 0: - mem_point = 0 - - t_mem = mem_point / rate_mem - t_swap = swap_point / rate_swap - - if CHECK_ZRAM: - t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram - if t_zram < 0: - t_zram = 0 - t_mem_zram = t_mem + t_zram - - t_mem_swap = t_mem + t_swap - - if CHECK_ZRAM: - - if t_mem_swap <= t_mem_zram: - t = t_mem_swap - else: - t = t_mem_zram - else: - t = t_mem_swap - - if t > max_sleep: - t = max_sleep - elif t < min_sleep: - t = min_sleep - else: - pass - - if print_sleep_periods: - - log( - 'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format( - round(t, 2), - round(t_mem, 2), - round(t_swap, 2), - round(t_zram, 2) - ) - ) - - try: - stdout.flush() - except OSError: - pass - - sleep(t) - - -def calculate_percent(arg_key): - """ - parse conf dict - Calculate mem_min_KEY_percent. - - Try use this one) - arg_key: str key for config_dict - returns int mem_min_percent or NoneType if got some error - """ - - if arg_key in config_dict: - mem_min = config_dict[arg_key] - - if mem_min.endswith('%'): - # truncate percents, so we have a number - mem_min_percent = mem_min[:-1].strip() - # then 'float test' - mem_min_percent = string_to_float_convert_test(mem_min_percent) - if mem_min_percent is None: - errprint('Invalid {} value, not float\nExit'.format(arg_key)) - exit(1) - # Final validations... - if mem_min_percent < 0 or mem_min_percent > 100: - errprint( - '{}, as percents value, out of ran' - 'ge [0; 100]\nExit'.format(arg_key)) - exit(1) - - # mem_min_sigterm_percent is clean and valid float percentage. Can - # translate into Kb - mem_min_kb = mem_min_percent / 100 * mem_total - mem_min_mb = round(mem_min_kb / 1024) - - elif mem_min.endswith('M'): - mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip()) - if mem_min_mb is None: - errprint('Invalid {} value, not float\nExit'.format(arg_key)) - exit(1) - mem_min_kb = mem_min_mb * 1024 - if mem_min_kb > mem_total: - errprint( - '{} value can not be greater then MemT' - 'otal ({} MiB)\nExit'.format( - arg_key, round( - mem_total / 1024))) - exit(1) - mem_min_percent = mem_min_kb / mem_total * 100 - - else: - log('Invalid {} units in config.\n Exit'.format(arg_key)) - exit(1) - mem_min_percent = None - - else: - log('{} not in config\nExit'.format(arg_key)) - exit(1) - mem_min_percent = None - - return mem_min_kb, mem_min_mb, mem_min_percent - - -########################################################################## - - -start_time = time() - - -help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG] - -optional arguments: - -h, --help show this help message and exit - -v, --version print version - -t, --test print some tests - -p, --print-proc-table - print table of processes with their badness values - -c CONFIG, --config CONFIG - path to the config file, default values: - ./nohang.conf, /etc/nohang/nohang.conf""" - - -SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK']) - -SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE']) - -conf_err_mess = 'Invalid config. Exit.' - -sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP] - -sig_dict = { - SIGKILL: 'SIGKILL', - SIGINT: 'SIGINT', - SIGQUIT: 'SIGQUIT', - SIGHUP: 'SIGHUP', - SIGTERM: 'SIGTERM' -} - -self_pid = str(os.getpid()) - -self_uid = os.geteuid() - -if self_uid == 0: - root = True -else: - root = False - - -if os.path.exists('./nohang_notify_helper'): - notify_helper_path = './nohang_notify_helper' -else: - notify_helper_path = '/usr/sbin/nohang_notify_helper' - - -victim_dict = dict() - - -victim_id = None -actions_time_dict = dict() -actions_time_dict['action_handled'] = [time(), victim_id] -# print(actions_time_dict) - - -# will store corrective actions stat -stat_dict = dict() - - -separate_log = False # will be overwritten after parse config - - -cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes() - - -self_oom_score_adj_min = '-600' -self_oom_score_adj_max = '-6' - - -write_self_oom_score_adj(self_oom_score_adj_min) - - -pid_list = get_pid_list() - - -print_proc_table_flag = False - -if len(argv) == 1: - if os.path.exists('./nohang.conf'): - config = os.getcwd() + '/nohang.conf' - else: - config = '/etc/nohang/nohang.conf' - -elif len(argv) == 2: - if argv[1] == '--help' or argv[1] == '-h': - print(help_mess) - exit() - elif argv[1] == '--version' or argv[1] == '-v': - print_version() - elif argv[1] == '--test' or argv[1] == '-t': - test() - elif argv[1] == '--print-proc-table' or argv[1] == '-p': - print_proc_table_flag = True - if os.path.exists('./nohang.conf'): - config = os.getcwd() + '/nohang.conf' - else: - config = '/etc/nohang/nohang.conf' - else: - errprint('Unknown option: {}'.format(argv[1])) - exit(1) - -elif len(argv) == 3: - if argv[1] == '--config' or argv[1] == '-c': - config = argv[2] - else: - errprint('Unknown option: {}'.format(argv[1])) - exit(1) - -else: - errprint('Invalid CLI input: too many options') - exit(1) - - -# find mem_total -# find positions of SwapFree and SwapTotal in /proc/meminfo - -with open('/proc/meminfo') as f: - mem_list = f.readlines() - -mem_list_names = [] -for s in mem_list: - mem_list_names.append(s.split(':')[0]) - -if mem_list_names[2] != 'MemAvailable': - errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied') - # exit(1) - -swap_total_index = mem_list_names.index('SwapTotal') -swap_free_index = swap_total_index + 1 - -mem_total = int(mem_list[0].split(':')[1][:-4]) - -# Get names from /proc/*/status to be able to get VmRSS and VmSwap values - -with open('/proc/self/status') as file: - status_list = file.readlines() - -status_names = [] -for s in status_list: - status_names.append(s.split(':')[0]) - -ppid_index = status_names.index('PPid') -vm_size_index = status_names.index('VmSize') -vm_rss_index = status_names.index('VmRSS') -vm_swap_index = status_names.index('VmSwap') -uid_index = status_names.index('Uid') -state_index = status_names.index('State') - - -try: - anon_index = status_names.index('RssAnon') - file_index = status_names.index('RssFile') - shmem_index = status_names.index('RssShmem') - detailed_rss = True - # print(detailed_rss, 'detailed_rss') -except ValueError: - detailed_rss = False - # print('It is not Linux 4.5+') - - -log('Config: ' + config) - - -########################################################################## - -# parsing the config with obtaining the parameters dictionary - -# conf_parameters_dict -# conf_restart_dict - -# dictionary with config options -config_dict = dict() - -processname_re_list = [] -cmdline_re_list = [] -environ_re_list = [] -uid_re_list = [] -cgroup_v1_re_list = [] -cgroup_v2_re_list = [] -realpath_re_list = [] - -soft_actions_list = [] - - -# separator for optional parameters (that starts with @) -opt_separator = '///' - - -# stupid conf parsing, need refactoring -try: - with open(config) as f: - - for line in f: - - a = line.startswith('#') - b = line.startswith('\n') - c = line.startswith('\t') - d = line.startswith(' ') - - etc = line.startswith('@SOFT_ACTION_RE_NAME') - etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1') - - if not a and not b and not c and not d and not etc and not etc2: - a = line.partition('=') - - key = a[0].strip() - value = a[2].strip() - - if key not in config_dict: - config_dict[key] = value - else: - log('ERROR: config key duplication: {}'.format(key)) - exit(1) - - if etc: - - a = line.partition('@SOFT_ACTION_RE_NAME')[ - 2].partition(opt_separator) - - a1 = 'name' - - a2 = a[0].strip() - valid_re(a2) - - a3 = a[2].strip() - - zzz = (a1, a2, a3) - - soft_actions_list.append(zzz) - - if etc2: - - a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[ - 2].partition(opt_separator) - - a1 = 'cgroup_v1' - - a2 = a[0].strip() - valid_re(a2) - - a3 = a[2].strip() - - zzz = (a1, a2, a3) - - soft_actions_list.append(zzz) - - if line.startswith('@PROCESSNAME_RE'): - a = line.partition( - '@PROCESSNAME_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - processname_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@CMDLINE_RE'): - a = line.partition( - '@CMDLINE_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - cmdline_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@UID_RE'): - a = line.partition( - '@UID_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - uid_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@CGROUP_V1_RE'): - a = line.partition( - '@CGROUP_V1_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - cgroup_v1_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@CGROUP_V2_RE'): - a = line.partition( - '@CGROUP_V2_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - cgroup_v2_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@REALPATH_RE'): - a = line.partition( - '@REALPATH_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - realpath_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@ENVIRON_RE'): - a = line.partition( - '@ENVIRON_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - environ_re_list.append((badness_adj, reg_exp)) - - -except PermissionError: - errprint('PermissionError', conf_err_mess) - exit(1) -except UnicodeDecodeError: - errprint('UnicodeDecodeError', conf_err_mess) - exit(1) -except IsADirectoryError: - errprint('IsADirectoryError', conf_err_mess) - exit(1) -except IndexError: - errprint('IndexError', conf_err_mess) - exit(1) -except FileNotFoundError: - errprint('FileNotFoundError', conf_err_mess) - exit(1) - - -if processname_re_list == []: - regex_matching = False -else: - regex_matching = True - - -if cmdline_re_list == []: - re_match_cmdline = False -else: - re_match_cmdline = True - - -if uid_re_list == []: - re_match_uid = False -else: - re_match_uid = True - - -if environ_re_list == []: - re_match_environ = False -else: - re_match_environ = True - - -if realpath_re_list == []: - re_match_realpath = False -else: - re_match_realpath = True - - -if cgroup_v1_re_list == []: - re_match_cgroup_v1 = False -else: - re_match_cgroup_v1 = True - -if cgroup_v2_re_list == []: - re_match_cgroup_v2 = False -else: - re_match_cgroup_v2 = True - - -# print(processname_re_list) -# print(cmdline_re_list) -# print(uid_re_list) -# print(environ_re_list) -# print(realpath_re_list) -# print(cgroup_v1_re_list) -# print(cgroup_v2_re_list) - -# print(soft_actions_list) - -if soft_actions_list == []: - soft_actions = False -else: - soft_actions = True - -# print('soft_actions:', soft_actions) - -########################################################################## - - -# extracting parameters from the dictionary -# check for all necessary parameters -# validation of all parameters -psi_debug = conf_parse_bool('psi_debug') -print_total_stat = conf_parse_bool('print_total_stat') -print_proc_table = conf_parse_bool('print_proc_table') -forbid_negative_badness = conf_parse_bool('forbid_negative_badness') -print_victim_info = conf_parse_bool('print_victim_info') -print_config = conf_parse_bool('print_config') -print_mem_check_results = conf_parse_bool('print_mem_check_results') -print_sleep_periods = conf_parse_bool('print_sleep_periods') -gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings') -gui_notifications = conf_parse_bool('gui_notifications') -decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj') -ignore_psi = conf_parse_bool('ignore_psi') -ignore_zram = conf_parse_bool('ignore_zram') - - -(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent - ) = calculate_percent('mem_min_sigterm') - -(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent - ) = calculate_percent('mem_min_sigkill') - -(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent - ) = calculate_percent('zram_max_sigterm') - -(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent - ) = calculate_percent('zram_max_sigkill') - -(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent - ) = calculate_percent('mem_min_warnings') - -(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent - ) = calculate_percent('zram_max_warnings') - - -if 'rate_mem' in config_dict: - rate_mem = string_to_float_convert_test(config_dict['rate_mem']) - if rate_mem is None: - errprint('Invalid rate_mem value, not float\nExit') - exit(1) - if rate_mem <= 0: - errprint('rate_mem MUST be > 0\nExit') - exit(1) -else: - errprint('rate_mem not in config\nExit') - exit(1) - - -if 'rate_swap' in config_dict: - rate_swap = string_to_float_convert_test(config_dict['rate_swap']) - if rate_swap is None: - errprint('Invalid rate_swap value, not float\nExit') - exit(1) - if rate_swap <= 0: - errprint('rate_swap MUST be > 0\nExit') - exit(1) -else: - errprint('rate_swap not in config\nExit') - exit(1) - - -if 'rate_zram' in config_dict: - rate_zram = string_to_float_convert_test(config_dict['rate_zram']) - if rate_zram is None: - errprint('Invalid rate_zram value, not float\nExit') - exit(1) - if rate_zram <= 0: - errprint('rate_zram MUST be > 0\nExit') - exit(1) -else: - errprint('rate_zram not in config\nExit') - exit(1) - - -if 'swap_min_sigterm' in config_dict: - swap_min_sigterm = config_dict['swap_min_sigterm'] -else: - errprint('swap_min_sigterm not in config\nExit') - exit(1) - - -if 'swap_min_sigkill' in config_dict: - swap_min_sigkill = config_dict['swap_min_sigkill'] -else: - errprint('swap_min_sigkill not in config\nExit') - exit(1) - - -if 'min_delay_after_sigterm' in config_dict: - min_delay_after_sigterm = string_to_float_convert_test( - config_dict['min_delay_after_sigterm']) - if min_delay_after_sigterm is None: - errprint('Invalid min_delay_after_sigterm value, not float\nExit') - exit(1) - if min_delay_after_sigterm < 0: - errprint('min_delay_after_sigterm must be positiv\nExit') - exit(1) -else: - errprint('min_delay_after_sigterm not in config\nExit') - exit(1) - - -if 'psi_post_action_delay' in config_dict: - psi_post_action_delay = string_to_float_convert_test( - config_dict['psi_post_action_delay']) - if psi_post_action_delay is None: - errprint('Invalid psi_post_action_delay value, not float\nExit') - exit(1) - if psi_post_action_delay < 0: - errprint('psi_post_action_delay must be positive\nExit') - exit(1) -else: - errprint('psi_post_action_delay not in config\nExit') - exit(1) - - -if 'sigkill_psi_threshold' in config_dict: - sigkill_psi_threshold = string_to_float_convert_test( - config_dict['sigkill_psi_threshold']) - if sigkill_psi_threshold is None: - errprint('Invalid sigkill_psi_threshold value, not float\nExit') - exit(1) - if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100: - errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit') - exit(1) -else: - errprint('sigkill_psi_threshold not in config\nExit') - exit(1) - - -if 'sigterm_psi_threshold' in config_dict: - sigterm_psi_threshold = string_to_float_convert_test( - config_dict['sigterm_psi_threshold']) - if sigterm_psi_threshold is None: - errprint('Invalid sigterm_psi_threshold value, not float\nExit') - exit(1) - if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100: - errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit') - exit(1) -else: - errprint('sigterm_psi_threshold not in config\nExit') - exit(1) - - -if 'min_badness' in config_dict: - min_badness = string_to_int_convert_test( - config_dict['min_badness']) - if min_badness is None: - errprint('Invalid min_badness value, not integer\nExit') - exit(1) - if min_badness < 0 or min_badness > 1000: - errprint('Invalud min_badness value\nExit') - exit(1) -else: - errprint('min_badness not in config\nExit') - exit(1) - - -if 'oom_score_adj_max' in config_dict: - oom_score_adj_max = string_to_int_convert_test( - config_dict['oom_score_adj_max']) - if oom_score_adj_max is None: - errprint('Invalid oom_score_adj_max value, not integer\nExit') - exit(1) - if oom_score_adj_max < 0 or oom_score_adj_max > 1000: - errprint('Invalid oom_score_adj_max value\nExit') - exit(1) -else: - errprint('oom_score_adj_max not in config\nExit') - exit(1) - - -if 'min_time_between_warnings' in config_dict: - min_time_between_warnings = string_to_float_convert_test( - config_dict['min_time_between_warnings']) - if min_time_between_warnings is None: - errprint('Invalid min_time_between_warnings value, not float\nExit') - exit(1) - if min_time_between_warnings < 1 or min_time_between_warnings > 300: - errprint('min_time_between_warnings value out of range [1; 300]\nExit') - exit(1) -else: - errprint('min_time_between_warnings not in config\nExit') - exit(1) - - -if 'swap_min_warnings' in config_dict: - swap_min_warnings = config_dict['swap_min_warnings'] -else: - errprint('swap_min_warnings not in config\nExit') - exit(1) - - -if 'max_ancestry_depth' in config_dict: - max_ancestry_depth = string_to_int_convert_test( - config_dict['max_ancestry_depth']) - if min_badness is None: - errprint('Invalid max_ancestry_depth value, not integer\nExit') - exit(1) - if max_ancestry_depth < 1: - errprint('Invalud max_ancestry_depth value\nExit') - exit(1) -else: - errprint('max_ancestry_depth is not in config\nExit') - exit(1) - - -if 'max_post_sigterm_victim_lifetime' in config_dict: - max_post_sigterm_victim_lifetime = string_to_float_convert_test( - config_dict['max_post_sigterm_victim_lifetime']) - if max_post_sigterm_victim_lifetime is None: - errprint('Invalid max_post_sigterm_victim_lifetime val' - 'ue, not float\nExit') - exit(1) - if max_post_sigterm_victim_lifetime < 0: - errprint('max_post_sigterm_victim_lifetime must be non-n' - 'egative number\nExit') - exit(1) -else: - errprint('max_post_sigterm_victim_lifetime is not in config\nExit') - exit(1) - - -if 'post_kill_exe' in config_dict: - post_kill_exe = config_dict['post_kill_exe'] -else: - errprint('post_kill_exe is not in config\nExit') - exit(1) - - -if 'psi_path' in config_dict: - psi_path = config_dict['psi_path'] -else: - errprint('psi_path is not in config\nExit') - exit(1) - - - - - - - -if 'psi_target' in config_dict: - psi_target = config_dict['psi_target'] -else: - errprint('psi_target is not in config\nExit') - exit(1) - - - - - - - - - - - - - -if 'psi_metrics' in config_dict: - psi_metrics = config_dict['psi_metrics'] -else: - errprint('psi_metrics is not in config\nExit') - exit(1) - - -if 'warning_exe' in config_dict: - warning_exe = config_dict['warning_exe'] - if warning_exe != '': - check_warning_exe = True - else: - check_warning_exe = False -else: - errprint('warning_exe is not in config\nExit') - exit(1) - - -if 'extra_table_info' in config_dict: - extra_table_info = config_dict['extra_table_info'] - if (extra_table_info != 'None' and - extra_table_info != 'cgroup_v1' and - extra_table_info != 'cgroup_v2' and - extra_table_info != 'cmdline' and - extra_table_info != 'environ' and - extra_table_info != 'realpath' and - extra_table_info != 'All'): - - errprint('Invalid config: invalid extra_table_info value\nExit') - exit(1) -else: - errprint('Invalid config: extra_table_info is not in config\nExit') - exit(1) - - -separate_log = conf_parse_bool('separate_log') - -if separate_log: - - import logging - from logging import basicConfig - from logging import info - - log_dir = '/var/log/nohang' - - try: - os.mkdir(log_dir) - except PermissionError: - print('ERROR: can not create log dir') - except FileExistsError: - pass - - logfile = log_dir + '/nohang.log' - - try: - with open(logfile, 'a') as f: - pass - except FileNotFoundError: - print('ERROR: log FileNotFoundError') - except PermissionError: - print('ERROR: log PermissionError') - - try: - basicConfig( - filename=logfile, - level=logging.INFO, - format="%(asctime)s: %(message)s") - except PermissionError: - errprint('ERROR: Permission denied: {}'.format(logfile)) - except FileNotFoundError: - errprint('ERROR: FileNotFoundError: {}'.format(logfile)) - - -if 'min_mem_report_interval' in config_dict: - min_mem_report_interval = string_to_float_convert_test( - config_dict['min_mem_report_interval']) - if min_mem_report_interval is None: - errprint('Invalid min_mem_report_interval value, not float\nExit') - exit(1) - if min_mem_report_interval < 0: - errprint('min_mem_report_interval must be non-negative number\nExit') - exit(1) -else: - errprint('min_mem_report_interval is not in config\nExit') - exit(1) - - -if 'max_sleep' in config_dict: - max_sleep = string_to_float_convert_test( - config_dict['max_sleep']) - if max_sleep is None: - errprint('Invalid max_sleep value, not float\nExit') - exit(1) - if max_sleep <= 0: - errprint('max_sleep must be positive number\nExit') - exit(1) -else: - errprint('max_sleep is not in config\nExit') - exit(1) - - -if 'min_sleep' in config_dict: - min_sleep = string_to_float_convert_test( - config_dict['min_sleep']) - if min_sleep is None: - errprint('Invalid min_sleep value, not float\nExit') - exit(1) - if min_sleep <= 0: - errprint('min_sleep must be positive number\nExit') - exit(1) -else: - errprint('min_sleep is not in config\nExit') - exit(1) - - -if 'over_sleep' in config_dict: - over_sleep = string_to_float_convert_test( - config_dict['over_sleep']) - if over_sleep is None: - errprint('Invalid over_sleep value, not float\nExit') - exit(1) - if over_sleep <= 0: - errprint('over_sleep must be positive number\nExit') - exit(1) -else: - errprint('over_sleep is not in config\nExit') - exit(1) - - -if max_sleep < min_sleep: - errprint( - 'max_sleep value must not exceed min_sleep value.\nExit' - ) - exit(1) - - -if min_sleep < over_sleep: - errprint( - 'min_sleep value must not exceed over_sleep value.\nExit' - ) - exit(1) - - -if max_sleep == min_sleep: - stable_sleep = True -else: - stable_sleep = False - - -if print_proc_table_flag: - - if not root: - log('WARNING: effective UID != 0; euid={}; processes with other e' - 'uids will be invisible for nohang'.format(self_uid)) - - func_print_proc_table() - - -########################################################################## - - -psi_support = os.path.exists(psi_path) - - -########################################################################## - - -# Get KiB levels if it's possible. - - -swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm) -swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill) -swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings) - - -swap_term_is_percent = swap_min_sigterm_tuple[1] -if swap_term_is_percent: - swap_min_sigterm_percent = swap_min_sigterm_tuple[0] -else: - swap_min_sigterm_kb = swap_min_sigterm_tuple[0] - - -swap_kill_is_percent = swap_min_sigkill_tuple[1] -if swap_kill_is_percent: - swap_min_sigkill_percent = swap_min_sigkill_tuple[0] -else: - swap_min_sigkill_kb = swap_min_sigkill_tuple[0] - - -swap_warn_is_percent = swap_min_warnings_tuple[1] -if swap_warn_is_percent: - swap_min_warnings_percent = swap_min_warnings_tuple[0] -else: - swap_min_warnings_kb = swap_min_warnings_tuple[0] - - -########################################################################## - -# outdated section, need fixes - -if print_config: - - print( - '\n1. Memory levels to respond to as an OOM threat\n[display' - 'ing these options need fix]\n') - - print('mem_min_sigterm: {} MiB, {} %'.format( - round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1))) - print('mem_min_sigkill: {} MiB, {} %'.format( - round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1))) - - print('swap_min_sigterm: {}'.format(swap_min_sigterm)) - print('swap_min_sigkill: {}'.format(swap_min_sigkill)) - - print('zram_max_sigterm: {} MiB, {} %'.format( - round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1))) - print('zram_max_sigkill: {} MiB, {} %'.format( - round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1))) - - print('\n2. The frequency of checking the level of available m' - 'emory (and CPU usage)\n') - print('rate_mem: {}'.format(rate_mem)) - print('rate_swap: {}'.format(rate_swap)) - print('rate_zram: {}'.format(rate_zram)) - - print('\n3. The prevention of killing innocent victims\n') - print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm)) - print('min_badness: {}'.format(min_badness)) - - print('decrease_oom_score_adj: {}'.format( - decrease_oom_score_adj - )) - if decrease_oom_score_adj: - print('oom_score_adj_max: {}'.format(oom_score_adj_max)) - - print('\n4. Impact on the badness of processes via matching their' - ' names, cmdlines ir UIDs with regular expressions\n') - - print('(todo)') - - print('\n5. The execution of a specific command instead of sen' - 'ding the\nSIGTERM signal\n') - - print('\n6. GUI notifications:\n- OOM prevention results and\n- low m' - 'emory warnings\n') - print('gui_notifications: {}'.format(gui_notifications)) - - print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings)) - if gui_low_memory_warnings: - print('min_time_between_warnings: {}'.format( - min_time_between_warnings)) - - print('mem_min_warnings: {} MiB, {} %'.format( - round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1))) - - print('swap_min_warnings: {}'.format(swap_min_warnings)) - - print('zram_max_warnings: {} MiB, {} %'.format( - round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1))) - - print('\n7. Output verbosity\n') - print('print_config: {}'.format(print_config)) - print('print_mem_check_results: {}'.format(print_mem_check_results)) - print('print_sleep_periods: {}\n'.format(print_sleep_periods)) - - -########################################################################## - - -# for calculating the column width when printing mem and zram -mem_len = len(str(round(mem_total / 1024.0))) - -if gui_notifications: - notify_sig_dict = {SIGKILL: 'Killing', - SIGTERM: 'Terminating'} - - -# convert rates from MiB/s to KiB/s -rate_mem = rate_mem * 1024 -rate_swap = rate_swap * 1024 -rate_zram = rate_zram * 1024 - - -warn_time_now = 0 -warn_time_delta = 1000 -warn_timer = 0 - - -########################################################################## - - -if not root: - log('WARNING: effective UID != 0; euid={}; processes with other e' - 'uids will be invisible for nohang'.format(self_uid)) - - -# Try to lock all memory - -mlockall() - -########################################################################## - - -# print_self_rss() - - -psi_avg_string = '' # will be overwritten if PSI monitoring enabled - -mem_used_zram = 0 - -if psi_support and not ignore_psi: - psi_t0 = time() - - -if print_mem_check_results: - - # to find delta mem - wt2 = 0 - new_mem = 0 - - # init mem report interval - report0 = 0 - - -# handle signals -for i in sig_list: - signal(i, signal_handler) - - -CHECK_PSI = False -if psi_support and not ignore_psi: - CHECK_PSI = True - - -CHECK_ZRAM = not ignore_zram - -log('Monitoring has started!') - -stdout.flush() - - - - - - -i = cgroup2_root() - -print(i) -print(psi_target) - -i = /foo - - -########################################################################## - - -while True: - - # Q = time() - - # FIND VALUES: mem, swap, zram, psi - - mem_available, swap_total, swap_free = check_mem_and_swap() - - # if swap_min_sigkill is set in percent - if swap_kill_is_percent: - swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0 - - if swap_term_is_percent: - swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0 - - if swap_warn_is_percent: - swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0 - - if swap_total > swap_min_sigkill_kb: - swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1)) - else: - swap_sigkill_pc = '-' - - if swap_total > swap_min_sigterm_kb: - swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1)) - else: - swap_sigterm_pc = '-' - - if CHECK_ZRAM: - mem_used_zram = check_zram() - - if CHECK_PSI: - psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics) - if time() - psi_t0 >= psi_post_action_delay: - psi_post_action_delay_exceeded = True - else: - psi_post_action_delay_exceeded = False - - if print_mem_check_results: - psi_avg_string = 'PSI avg value: {} | '.format( - str(psi_avg_value).rjust(6)) - - if print_mem_check_results: - - wt1 = time() - - delta = (mem_available + swap_free) - new_mem - - t_cycle = wt1 - wt2 - - report_delta = wt1 - report0 - - if report_delta >= min_mem_report_interval: - - mem_report = True - new_mem = mem_available + swap_free - - report0 = wt1 - - else: - mem_report = False - - wt2 = time() - - if mem_report: - - speed = delta / 1024.0 / report_delta - speed_info = ' | dMem: {} M/s'.format( - str(round(speed)).rjust(5) - ) - - # Calculate 'swap-column' width - swap_len = len(str(round(swap_total / 1024.0))) - - # Output available mem sizes - if swap_total == 0 and mem_used_zram == 0: - log('{}MemAvail: {} M, {} %{}'.format( - psi_avg_string, - human(mem_available, mem_len), - just_percent_mem(mem_available / mem_total), - speed_info - ) - ) - - elif swap_total > 0 and mem_used_zram == 0: - log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format( - psi_avg_string, - human(mem_available, mem_len), - just_percent_mem(mem_available / mem_total), - human(swap_free, swap_len), - just_percent_swap(swap_free / (swap_total + 0.1)), - speed_info - ) - ) - - else: - log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem' - 'UsedZram: {} M, {} %{}'.format( - psi_avg_string, - human(mem_available, mem_len), - just_percent_mem(mem_available / mem_total), - human(swap_free, swap_len), - just_percent_swap(swap_free / (swap_total + 0.1)), - human(mem_used_zram, mem_len), - just_percent_mem(mem_used_zram / mem_total), - speed_info - ) - ) - - ########################################################################### - - # CHECK HARD THRESHOLDS (SIGKILL LEVEL) - - if (mem_available <= mem_min_sigkill_kb and - swap_free <= swap_min_sigkill_kb): - - mem_info = 'Hard threshold exceeded\nMemory status that requ' \ - 'ires corrective actions:' \ - '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ - 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ - 'p_min_sigkill [{} MiB, {} %]'.format( - kib_to_mib(mem_available), - percent(mem_available / mem_total), - kib_to_mib(mem_min_sigkill_kb), - percent(mem_min_sigkill_kb / mem_total), - kib_to_mib(swap_free), - percent(swap_free / (swap_total + 0.1)), - kib_to_mib(swap_min_sigkill_kb), - swap_sigkill_pc) - - implement_corrective_action(SIGKILL) - psi_t0 = time() - continue - - if CHECK_ZRAM: - if mem_used_zram >= zram_max_sigkill_kb: - - mem_info = 'Hard threshold exceeded\nMemory status that requir' \ - 'es corrective actions:' \ - '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \ - 'kill [{} MiB, {} %]'.format( - kib_to_mib(mem_used_zram), - percent(mem_used_zram / mem_total), - kib_to_mib(zram_max_sigkill_kb), - percent(zram_max_sigkill_kb / mem_total)) - - implement_corrective_action(SIGKILL) - psi_t0 = time() - continue - - if CHECK_PSI: - if psi_avg_value >= sigkill_psi_threshold: - sigkill_psi_exceeded = True - else: - sigkill_psi_exceeded = False - - if sigkill_psi_exceeded and psi_post_action_delay_exceeded: - - mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \ - 'old ({})'.format( - psi_avg_value, sigkill_psi_threshold) - - implement_corrective_action(SIGKILL) - psi_t0 = time() - continue - - ########################################################################### - - # CHECK SOFT THRESHOLDS (SIGTERM LEVEL) - - if (mem_available <= mem_min_sigterm_kb and - swap_free <= swap_min_sigterm_kb): - - mem_info = 'Soft threshold exceeded\nMemory status that requi' \ - 'res corrective actions:' \ - '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ - 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ - 'p_min_sigterm [{} MiB, {} %]'.format( - kib_to_mib(mem_available), - percent(mem_available / mem_total), - kib_to_mib(mem_min_sigterm_kb), - round(mem_min_sigterm_percent, 1), - kib_to_mib(swap_free), - percent(swap_free / (swap_total + 0.1)), - kib_to_mib(swap_min_sigterm_kb), - swap_sigterm_pc) - - implement_corrective_action(SIGTERM) - psi_t0 = time() - continue - - if CHECK_ZRAM: - if mem_used_zram >= zram_max_sigterm_kb: - - mem_info = 'Soft threshold exceeded\nMemory status that require' \ - 's corrective actions:\n MemUsedZram [{} MiB, {} %] >= zra' \ - 'm_max_sigterm [{} M, {} %]'.format( - kib_to_mib(mem_used_zram), - percent(mem_used_zram / mem_total), - kib_to_mib(zram_max_sigterm_kb), - percent(zram_max_sigterm_kb / mem_total)) - - implement_corrective_action(SIGTERM) - psi_t0 = time() - continue - - if CHECK_PSI: - if psi_avg_value >= sigterm_psi_threshold: - sigterm_psi_exceeded = True - else: - sigterm_psi_exceeded = False - - if psi_debug: - log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps' - 'i_post_action_delay_exceeded: {}'.format( - sigterm_psi_exceeded, - sigkill_psi_exceeded, - psi_post_action_delay_exceeded)) - - if sigterm_psi_exceeded and psi_post_action_delay_exceeded: - - mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \ - 'shold ({})'.format(psi_avg_value, sigterm_psi_threshold) - - implement_corrective_action(SIGTERM) - psi_t0 = time() - continue - - ########################################################################### - - if gui_low_memory_warnings: - - if (mem_available <= mem_min_warnings_kb and - swap_free <= swap_min_warnings_kb + 0.1 or - mem_used_zram >= zram_max_warnings_kb): - - warn_time_delta = time() - warn_time_now - warn_time_now = time() - warn_timer += warn_time_delta - if warn_timer > min_time_between_warnings: - send_notify_warn() - warn_timer = 0 - - - # x = time() - Q - # print(x * 1000) - - sleep_after_check_mem() diff --git a/trash/new5.conf b/trash/new5.conf deleted file mode 100644 index 46681bd..0000000 --- a/trash/new5.conf +++ /dev/null @@ -1,140 +0,0 @@ -This is nohang config file. -Lines starting with $ contain obligatory parameters. -Lines starting with @ contain optional parameters. -Other lines are comments. - - 0. Common zram settings - - See https://www.kernel.org/doc/Documentation/blockdev/zram.txt - You maybe need $IGNORE_ZRAM=FALSE if you has a big zram disksize - -$IGNORE_ZRAM = TRUE - - 1. Common PSI settings - - See - https://lwn.net/Articles/759658/ - https://facebookmicrosites.github.io/psi/ - -$IGNORE_PSI = TRUE - -$PSI_PATH = /proc/pressure/memory - - some_avg10 is most sensitive. - -$PSI_METRICS = some_avg10 - -$PSI_EXCESS_DURATION = 0 - -$PSI_POST_ACTION_DELAY = 40 - - - - 2. Poll rate - -$FILL_RATE_MEM = 4000 -$FILL_RATE_SWAP = 1500 -$FILL_RATE_ZRAM = 500 -$MIN_SLEEP = 0.1 -$MAX_SLEEP = 3 - - - 3. Warnings / GUI notifications - -$GUI_CORRECTIVE_ACTIONS = FALSE - -$GUI_LOW_MEMORY_WARNINGS = FALSE -$GUI_WARNINGS_MIN_MEM = 20 % -$GUI_WARNINGS_MIN_SWAP = 20 % -$GUI_WARNINGS_MAX_ZRAM = 45 % -$GUI_MIN_DELAY_AFTER_WARNING = 15 - -@EXE_INSTEAD_OF_GUI_WARNING wall -n "LOW MEMORY!" -@EXE_INSTEAD_OF_GUI_WARNING echo 'test' - - - 4. Soft threshold / SIGTERM-related parameters - -$SOFT_MIN_MEM_THRESHOLD = 10 % -$SOFT_MIN_SWAP_THRESHOLD = 10 % -$SOFT_MAX_ZRAM_THRESHOLD = 50 % - -$SOFT_POST_ACTION_DELAY = 0.2 - -$MAX_POST_SOFT_ACTION_VICTIM_LIFETIME = 9 - -$SOFT_MAX_PSI_THRESHOLD = 60 -$SOFT_MAX_PSI_DURATION = 5 - - The execution of specified command instead of sending the SIGTERM signal. - Syntax example: - /// - -@EXE_INSTEAD_OF_SIGTERM_RE_NAME ^foo$ /// kill -9 $PID && echo "Praise KEK, kill $NAME" & - -@EXE_INSTEAD_OF_SIGTERM_RE_CMDLINE ^/sbin/foo /// systemctl restart foo - -@EXE_INSTEAD_OF_SIGTERM_RE_REALPATH ^/sbin/bar$ /// systemctl restart foo - -@EXE_INSTEAD_OF_SIGTERM_RE_UID ^1000$ /// pkill -SEGV $NAME -@EXE_INSTEAD_OF_SIGTERM_RE_UID ^1001$ /// pkill -HUP $NAME - -@SOFT_THRESHOLD_EXE_RE_NAME - - 5. Hard threshold / SIGKILL-related parameters - -$HARD_MIN_MEM_THRESHOLD = 5 % -$HARD_MIN_SWAP_THRESHOLD = 5 % -$HARD_MAX_ZRAM_THRESHOLD = 55 % - -$POST_KILL_EXE = - -$HARD_POST_ACTION_DELAY = 1 - -$HARD_MAX_PSI_THRESHOLD = 90 -$HARD_MAX_PSI_DURATION = 5 - - -$POST_KILL_EXE = - - - - 6. Adjusting badness of processes - -$OOM_SCORE_ADJ_LIMIT = -1 - - Badness adjusting by matching process name, cmdline and eUID with specified regular expression. - - Example badness adj rules - /// - -@BADNESS_ADJ_RE_CMDLINE -childID|--type=renderer /// 200 - -@BADNESS_ADJ_RE_NAME ^Xorg$ /// -100 - -@BADNESS_ADJ_RE_UID ^0$ /// -50 - -@BADNESS_ADJ_RE_REALPATH ^/usr/bin/tail$ /// 100 - - - - 7. Avoid killing small processes (innocent victims) - -$MIN_VICTIM_BADNESS = 20 - - - 8. Verbosity - - $PRINT_CONFIG_AT_STARTUP = FALSE // --print-config - -$MIN_MEM_REPORT_INTERVAL = -1 - -$PRINT_VICTIM_INFO = TRUE - -$PRINT_TOTAL_STAT = TRUE - -$PRINT_PROC_TABLE = FALSE - -$PRINT_SLEEP_PERIODS = FALSE - - diff --git a/trash/nohang 0.2 rc1 b/trash/nohang 0.2 rc1 deleted file mode 100755 index 66de8d0..0000000 --- a/trash/nohang 0.2 rc1 +++ /dev/null @@ -1,2946 +0,0 @@ -#!/usr/bin/env python3 -"""A daemon that prevents OOM in Linux systems.""" - -import os -from ctypes import CDLL -from time import sleep, time -from operator import itemgetter -from sys import stdout, stderr, argv, exit, version -from signal import (signal, - SIGKILL, SIGTERM, SIGINT, SIGQUIT, - SIGHUP, SIGABRT, SIGSEGV, SIGBUS) -from re import search -from sre_constants import error as invalid_re - -start_time = time() - - -help_mess = """usage: nohang [-h] [-v] [-t] [-p] [-c CONFIG] - -optional arguments: - -h, --help show this help message and exit - -v, --version print version - -t, --test print some tests - -p, --print-proc-table - print table of processes with their badness values - -c CONFIG, --config CONFIG - path to the config file, default values: - ./nohang.conf, /etc/nohang/nohang.conf""" - - -SC_CLK_TCK = os.sysconf(os.sysconf_names['SC_CLK_TCK']) - -conf_err_mess = 'Invalid config. Exit.' - -sig_dict = {SIGKILL: 'SIGKILL', - SIGTERM: 'SIGTERM'} - -self_pid = str(os.getpid()) - -self_uid = os.geteuid() - -if self_uid == 0: - root = True -else: - root = False - - -if os.path.exists('./nohang_notify_helper'): - notify_helper_path = './nohang_notify_helper' -else: - notify_helper_path = '/usr/sbin/nohang_notify_helper' - - -victim_dict = dict() - - -# will store corrective actions stat -stat_dict = dict() - - -separate_log = False # will be overwritten after parse config - - -def find_cgroup_indexes(): - """ Find cgroup-line positions in /proc/*/cgroup file. - """ - - cgroup_v1_index = None - cgroup_v2_index = None - - with open('/proc/self/cgroup') as f: - for index, line in enumerate(f): - if ':name=' in line: - cgroup_v1_index = index - if line.startswith('0::'): - cgroup_v2_index = index - - return cgroup_v1_index, cgroup_v2_index - - -cgroup_v1_index, cgroup_v2_index = find_cgroup_indexes() - - -########################################################################## - -# define functions - - -def self_rss(): - """ - """ - return pid_to_status(self_pid)[5] - - -def print_self_rss(): - """ - """ - log('Self RSS: {} MiB'.format(self_rss())) - - -def signal_handler(signum, frame): - log('Got signal {}'.format(signum)) - update_stat_dict_and_print(None) - log('Exit') - exit() - - -def write(path, string): - """ - """ - with open(path, 'w') as f: - f.write(string) - - -def write_self_oom_score_adj(new_value): - """ - """ - if root: - write('/proc/self/oom_score_adj', new_value) - - -self_oom_score_adj_min = '-600' -self_oom_score_adj_max = '-6' - - -write_self_oom_score_adj(self_oom_score_adj_min) - - -def exe(cmd): - """ - """ - log('Execute the command: {}'.format(cmd)) - t0 = time() - write_self_oom_score_adj(self_oom_score_adj_max) - err = os.system(cmd) - write_self_oom_score_adj(self_oom_score_adj_min) - dt = time() - t0 - log('Exit status: {}; exe duration: {} sec'.format(err, round(dt, 3))) - return err - - -def valid_re(reg_exp): - """Validate regular expression. - """ - try: - search(reg_exp, '') - except invalid_re: - log('Invalid config: invalid regexp: {}'.format(reg_exp)) - exit(1) - - -def func_print_proc_table(): - """ - """ - print_proc_table = True - find_victim(print_proc_table) - exit() - - -def log(*msg): - """ - """ - try: - print(*msg) - except OSError: - sleep(0.01) - # print('OSError in print(*msg)') - - if separate_log: - # need fix: TypeError: not all arguments converted during string - # formatting - - try: - info(*msg) - except OSError: - sleep(0.01) - # print('OSError in info(*msg)') - - -def print_version(): - """ - сначала пытаться получ версию прямо из гита - вариант для неустановленых, - для тех, кто еще не запускал make install - """ - try: - v = rline1('/etc/nohang/version') - except FileNotFoundError: - v = None - if v is None: - print('Nohang unknown version') - else: - print('Nohang ' + v) - exit() - - -def test(): - """ - """ - - print(version) - print(argv) - - hr = '==================================' - print(hr) - print("uptime()") - print(uptime()) - - print(hr) - print("os.uname()") - print(os.uname()) - - print(hr) - print("pid_to_starttime('self')") - print(pid_to_starttime('self')) - - print(hr) - print("get_victim_id('self')") - print(get_victim_id('self')) - - print(hr) - print("errprint('test')") - print(errprint('test')) - - print(hr) - print("mlockall()") - print(mlockall()) - - print(hr) - print("pid_to_state('2')") - print(pid_to_state('2')) - - ''' - print(hr) - print("update_stat_dict_and_print('key')") - print(update_stat_dict_and_print('key')) - - print(hr) - print("psi_mem_some_avg_total()") - print(psi_mem_some_avg_total()) - - print(hr) - print("psi_mem_some_avg10()") - print(psi_mem_some_avg10()) - - - - ''' - - print(hr) - exit() - - -########################################################################## - - -def pid_to_cgroup_v1(pid): - """ - """ - cgroup_v1 = '' - try: - with open('/proc/' + pid + '/cgroup') as f: - for index, line in enumerate(f): - if index == cgroup_v1_index: - cgroup_v1 = '/' + line.partition('/')[2][:-1] - return cgroup_v1 - except FileNotFoundError: - return '' - - -def pid_to_cgroup_v2(pid): - """ - """ - cgroup_v2 = '' - try: - with open('/proc/' + pid + '/cgroup') as f: - for index, line in enumerate(f): - if index == cgroup_v2_index: - cgroup_v2 = line[3:-1] - return cgroup_v2 - except FileNotFoundError: - return '' - - -def pid_to_starttime(pid): - """ handle FNF error! - """ - try: - starttime = rline1('/proc/' + pid + '/stat').rpartition(')')[ - 2].split(' ')[20] - - except UnicodeDecodeError: - # print('LOL') - with open('/proc/' + pid + '/stat', 'rb') as f: - starttime = f.read().decode('utf-8', 'ignore').rpartition( - ')')[2].split(' ')[20] - - return float(starttime) / SC_CLK_TCK - - -def get_victim_id(pid): - """victim_id is starttime + pid""" - try: - return rline1('/proc/' + pid + '/stat').rpartition( - ')')[2].split(' ')[20] + pid - except FileNotFoundError: - return '' - - -def pid_to_state(pid): - """ Handle FNF error! (BTW it already handled in find_victim_info()) - """ - return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1] - - -def pid_to_name(pid): - """ - """ - try: - with open('/proc/' + pid + '/comm', 'rb') as f: - return f.read().decode('utf-8', 'ignore')[:-1] - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - - -def pid_to_ppid(pid): - """ - """ - try: - with open('/proc/' + pid + '/status') as f: - for n, line in enumerate(f): - if n is ppid_index: - return line.split('\t')[1].strip() - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - except UnicodeDecodeError: - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - for i in range(len(f_list)): - if i is ppid_index: - return f_list[i].split('\t')[1] - - -def pid_to_ancestry(pid, max_ancestry_depth=1): - """ - """ - if max_ancestry_depth == 1: - ppid = pid_to_ppid(pid) - pname = pid_to_name(ppid) - return '\n PPID: {} ({})'.format(ppid, pname) - if max_ancestry_depth == 0: - return '' - anc_list = [] - for i in range(max_ancestry_depth): - ppid = pid_to_ppid(pid) - pname = pid_to_name(ppid) - anc_list.append((ppid, pname)) - if ppid == '1': - break - pid = ppid - a = '' - for i in anc_list: - a = a + ' <= PID {} ({})'.format(i[0], i[1]) - return '\n Ancestry: ' + a[4:] - - -def pid_to_cmdline(pid): - """ - Get process cmdline by pid. - - pid: str pid of required process - returns string cmdline - """ - try: - with open('/proc/' + pid + '/cmdline') as f: - return f.read().replace('\x00', ' ').rstrip() - except FileNotFoundError: - return '' - - -def pid_to_environ(pid): - """ - Get process environ by pid. - - pid: str pid of required process - returns string environ - """ - try: - with open('/proc/' + pid + '/environ') as f: - return f.read().replace('\x00', ' ').rstrip() - except FileNotFoundError: - return '' - - -def pid_to_realpath(pid): - try: - return os.path.realpath('/proc/' + pid + '/exe') - except FileNotFoundError: - return '' - - -def pid_to_uid(pid): - """return euid""" - try: - with open('/proc/' + pid + '/status') as f: - for n, line in enumerate(f): - if n is uid_index: - return line.split('\t')[2] - except UnicodeDecodeError: - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - return f_list[uid_index].split('\t')[2] - except FileNotFoundError: - return '' - - -def pid_to_badness(pid): - """Find and modify badness (if it needs).""" - - try: - - oom_score = int(rline1('/proc/' + pid + '/oom_score')) - badness = oom_score - - if decrease_oom_score_adj: - oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj')) - if badness > oom_score_adj_max and oom_score_adj > 0: - badness = badness - oom_score_adj + oom_score_adj_max - - if regex_matching: - name = pid_to_name(pid) - for re_tup in processname_re_list: - if search(re_tup[1], name) is not None: - badness += int(re_tup[0]) - - if re_match_cgroup_v1: - cgroup_v1 = pid_to_cgroup_v1(pid) - for re_tup in cgroup_v1_re_list: - if search(re_tup[1], cgroup_v1) is not None: - badness += int(re_tup[0]) - - if re_match_cgroup_v2: - cgroup_v2 = pid_to_cgroup_v2(pid) - for re_tup in cgroup_v2_re_list: - if search(re_tup[1], cgroup_v2) is not None: - badness += int(re_tup[0]) - - if re_match_realpath: - realpath = pid_to_realpath(pid) - for re_tup in realpath_re_list: - if search(re_tup[1], realpath) is not None: - badness += int(re_tup[0]) - - if re_match_cmdline: - cmdline = pid_to_cmdline(pid) - for re_tup in cmdline_re_list: - if search(re_tup[1], cmdline) is not None: - badness += int(re_tup[0]) - - if re_match_environ: - environ = pid_to_environ(pid) - for re_tup in environ_re_list: - if search(re_tup[1], environ) is not None: - badness += int(re_tup[0]) - - if re_match_uid: - uid = pid_to_uid(pid) - for re_tup in uid_re_list: - if search(re_tup[1], uid) is not None: - badness += int(re_tup[0]) - - if forbid_negative_badness: - if badness < 0: - badness = 0 - - return badness, oom_score - - except FileNotFoundError: - return None, None - except ProcessLookupError: - return None, None - - -def pid_to_status(pid): - """ - """ - - try: - - with open('/proc/' + pid + '/status') as f: - - for n, line in enumerate(f): - - if n is 0: - name = line.split('\t')[1][:-1] - - if n is state_index: - state = line.split('\t')[1][0] - continue - - if n is ppid_index: - ppid = line.split('\t')[1][:-1] - continue - - if n is uid_index: - uid = line.split('\t')[2] - continue - - if n is vm_size_index: - vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if n is vm_rss_index: - vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if n is vm_swap_index: - vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) - break - - return name, state, ppid, uid, vm_size, vm_rss, vm_swap - - except UnicodeDecodeError: - return pid_to_status_unicode(pid) - - except FileNotFoundError: - return None - - except ProcessLookupError: - return None - - except ValueError: - return None - - -def pid_to_status_unicode(pid): - """ - """ - try: - - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - - for i in range(len(f_list)): - - if i is 0: - name = f_list[i].split('\t')[1] - - if i is state_index: - state = f_list[i].split('\t')[1][0] - - if i is ppid_index: - ppid = f_list[i].split('\t')[1] - - if i is uid_index: - uid = f_list[i].split('\t')[2] - - if i is vm_size_index: - vm_size = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is vm_rss_index: - vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) - - if i is vm_swap_index: - vm_swap = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) - - return name, state, ppid, uid, vm_size, vm_rss, vm_swap - - except FileNotFoundError: - return None - - except ProcessLookupError: - return None - - except ValueError: - return None - - -########################################################################## - - -def uptime(): - """ - """ - return float(rline1('/proc/uptime').split(' ')[0]) - - -def errprint(*text): - """ - """ - print(*text, file=stderr, flush=True) - - -def mlockall(): - """Lock all memory to prevent swapping nohang process.""" - - MCL_CURRENT = 1 - MCL_FUTURE = 2 - MCL_ONFAULT = 4 - - libc = CDLL('libc.so.6', use_errno=True) - - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT - ) - if result != 0: - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE - ) - if result != 0: - log('WARNING: cannot lock all memory') - else: - log('All memory locked with MCL_CURRENT | MCL_FUTURE') - else: - log('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT') - - -def update_stat_dict_and_print(key): - """ - """ - - if key is not None: - - if key not in stat_dict: - - stat_dict.update({key: 1}) - - else: - - new_value = stat_dict[key] + 1 - stat_dict.update({key: new_value}) - - if print_total_stat: - - stats_msg = 'Total stat (what happened in the last {}):'.format( - format_time(time() - start_time)) - - for i in stat_dict: - stats_msg += '\n {}: {}'.format(i, stat_dict[i]) - - log(stats_msg) - - -def find_psi_metrics_value(psi_path, psi_metrics): - """ - """ - - if psi_support: - - if psi_metrics == 'some_avg10': - return float(rline1(psi_path).split(' ')[1].split('=')[1]) - if psi_metrics == 'some_avg60': - return float(rline1(psi_path).split(' ')[2].split('=')[1]) - if psi_metrics == 'some_avg300': - return float(rline1(psi_path).split(' ')[3].split('=')[1]) - - if psi_metrics == 'full_avg10': - with open(psi_path) as f: - psi_list = f.readlines() - return float(psi_list[1].split(' ')[1].split('=')[1]) - if psi_metrics == 'full_avg60': - with open(psi_path) as f: - psi_list = f.readlines() - return float(psi_list[1].split(' ')[2].split('=')[1]) - if psi_metrics == 'full_avg300': - with open(psi_path) as f: - psi_list = f.readlines() - return float(psi_list[1].split(' ')[3].split('=')[1]) - - -def check_mem(): - """find mem_available""" - # исправить название фции - return int(rline1('/proc/meminfo').split(':')[1][:-4]) - - -def check_mem_and_swap(): - """find mem_available, swap_total, swap_free""" - with open('/proc/meminfo') as f: - for n, line in enumerate(f): - if n is 2: - mem_available = int(line.split(':')[1][:-4]) - continue - if n is swap_total_index: - swap_total = int(line.split(':')[1][:-4]) - continue - if n is swap_free_index: - swap_free = int(line.split(':')[1][:-4]) - break - return mem_available, swap_total, swap_free - - -def check_zram(): - """find MemUsedZram""" - disksize_sum = 0 - mem_used_total_sum = 0 - - for dev in os.listdir('/sys/block'): - if dev.startswith('zram'): - stat = zram_stat(dev) - disksize_sum += int(stat[0]) - mem_used_total_sum += int(stat[1]) - - # Means that when setting zram disksize = 1 GiB available memory - # decrease by 0.0042 GiB. - # Found experimentally, requires clarification with different kernaels and - # architectures. - # On small disk drives (up to gigabyte) it can be more, up to 0.0045. - # The creator of the zram module claims that ZRAM_DISKSIZE_FACTOR should - # be 0.001: - # ("zram uses about 0.1% of the size of the disk" - # - https://www.kernel.org/doc/Documentation/blockdev/zram.txt), - # but this statement contradicts the experimental data. - # ZRAM_DISKSIZE_FACTOR = deltaMemAvailavle / disksize - # Found experimentally. - ZRAM_DISKSIZE_FACTOR = 0.0042 - - return (mem_used_total_sum + disksize_sum * ZRAM_DISKSIZE_FACTOR) / 1024.0 - - -def format_time(t): - """ - """ - t = int(t) - if t < 60: - return '{} sec'.format(t) - elif t >= 60 and t < 3600: - m = t // 60 - s = t % 60 - return '{} min {} sec'.format(m, s) - else: - h = t // 3600 - s0 = t - h * 3600 - m = s0 // 60 - s = s0 % 60 - return '{} h {} min {} sec'.format(h, m, s) - - -def string_to_float_convert_test(string): - """Try to interprete string values as floats.""" - try: - return float(string) - except ValueError: - return None - - -def string_to_int_convert_test(string): - """Try to interpret string values as integers.""" - try: - return int(string) - except ValueError: - return None - - -def conf_parse_string(param): - """ - Get string parameters from the config dict. - - param: config_dict key - returns config_dict[param].strip() - """ - if param in config_dict: - return config_dict[param].strip() - else: - errprint('All the necessary parameters must be in the config') - errprint('There is no "{}" parameter in the config'.format(param)) - exit(1) - - -def conf_parse_bool(param): - """ - Get bool parameters from the config_dict. - - param: config_dicst key - returns bool - """ - if param in config_dict: - param_str = config_dict[param] - if param_str == 'True': - return True - elif param_str == 'False': - return False - else: - errprint('Invalid value of the "{}" parameter.'.format(param)) - errprint('Valid values are True and False.') - errprint('Exit') - exit(1) - else: - errprint('All the necessary parameters must be in the config') - errprint('There is no "{}" parameter in the config'.format(param)) - exit(1) - - -def rline1(path): - """read 1st line from path.""" - try: - with open(path) as f: - for line in f: - return line[:-1] - except UnicodeDecodeError: - # print('UDE rline1', path) - with open(path, 'rb') as f: - return f.read(999).decode( - 'utf-8', 'ignore').split('\n')[0] # use partition()! - - -def kib_to_mib(num): - """Convert KiB values to MiB values.""" - return round(num / 1024.0) - - -def percent(num): - """Interprete num as percentage.""" - return round(num * 100, 1) - - -def just_percent_mem(num): - """convert num to percent and justify""" - return str(round(num * 100, 1)).rjust(4, ' ') - - -def just_percent_swap(num): - """ - """ - return str(round(num * 100, 1)).rjust(5, ' ') - - -def human(num, lenth): - """Convert KiB values to MiB values with right alignment""" - return str(round(num / 1024)).rjust(lenth, ' ') - - -def zram_stat(zram_id): - """ - Get zram state. - - zram_id: str zram block-device id - returns bytes diskcize, str mem_used_total - """ - try: - disksize = rline1('/sys/block/' + zram_id + '/disksize') - except FileNotFoundError: - return '0', '0' - if disksize == ['0\n']: - return '0', '0' - try: - mm_stat = rline1('/sys/block/' + zram_id + '/mm_stat').split(' ') - mm_stat_list = [] - for i in mm_stat: - if i != '': - mm_stat_list.append(i) - mem_used_total = mm_stat_list[2] - except FileNotFoundError: - mem_used_total = rline1('/sys/block/' + zram_id + '/mem_used_total') - return disksize, mem_used_total # BYTES, str - - -def send_notify_warn(): - """ - Look for process with maximum 'badness' and warn user with notification. - (implement Low memory warnings) - """ - - ''' - # find process with max badness - fat_tuple = find_victim() - pid = fat_tuple[0] - name = pid_to_name(pid) - - if mem_used_zram > 0: - low_mem_percent = '{}% {}% {}%'.format( - round(mem_available / mem_total * 100), - round(swap_free / (swap_total + 0.1) * 100), - round(mem_used_zram / mem_total * 100)) - elif swap_free > 0: - low_mem_percent = '{}% {}%'.format( - round(mem_available / mem_total * 100), - round(swap_free / (swap_total + 0.1) * 100)) - else: - low_mem_percent = '{}%'.format( - round(mem_available / mem_total * 100)) - - # title = 'Low memory: {}'.format(low_mem_percent) - title = 'Low memory' - ''' - - ''' - body2 = 'Next victim: {}[{}]'.format( - name.replace( - # symbol '&' can break notifications in some themes, - # therefore it is replaced by '*' - '&', '*'), - pid - ) - ''' - - ''' - body = 'MemAvail: {}%\nSwapFree: {}%'.format( - round(mem_available / mem_total * 100), - round(swap_free / (swap_total + 0.1) * 100)) - - if root: # If nohang was started by root - # send notification to all active users with special script - notify_helper(title, body) - else: # Or by regular user - # send notification to user that runs this nohang - notify_send_wait(title, body) - ''' - - log('Warning threshold exceeded') - - if check_warning_exe: - exe(warning_exe) - - else: - - title = 'Low memory' - - body = 'MemAvail: {}%\nSwapFree: {}%'.format( - round(mem_available / mem_total * 100), - round(swap_free / (swap_total + 0.1) * 100) - ) - - send_notification(title, body) - - -def send_notify(signal, name, pid): - """ - Notificate about OOM Preventing. - - signal: key for notify_sig_dict - name: str process name - pid: str process pid - """ - - # wait for memory release after corrective action - # may be useful if free memory was about 0 immediately after - # corrective action - sleep(0.05) - - title = 'Freeze prevention' - body = '{} [{}] {}'.format( - notify_sig_dict[signal], - pid, - name.replace( - # symbol '&' can break notifications in some themes, - # therefore it is replaced by '*' - '&', '*' - ) - ) - - send_notification(title, body) - - -def send_notify_etc(pid, name, command): - """ - Notificate about OOM Preventing. - - command: str command that will be executed - name: str process name - pid: str process pid - """ - title = 'Freeze prevention' - body = 'Victim is [{}] {}\nExecute the co' \ - 'mmand:\n{}'.format( - pid, name.replace('&', '*'), command.replace('&', '*')) - - send_notification(title, body) - - -def send_notification(title, body): - """ - """ - split_by = '#' * 16 - - t000 = time() - - path_to_cache = '/dev/shm/nohang_notify_cache_uid{}_time{}'.format( - str(self_uid), t000 - ) - - text = '{}{}{}'.format(title, split_by, body) - - try: - with open(path_to_cache, 'w') as f: - f.write(text) - os.chmod(path_to_cache, 0o600) - except OSError: - log('OSError while send notification ' - '(No space left on device: /dev/shm)') - return None - - cmd = '{} --uid {} --time {} &'.format(notify_helper_path, self_uid, t000) - - exe(cmd) - - -def sleep_after_send_signal(signal): - """ - Sleeping after signal was sent. - - signal: sent signal - """ - if signal is SIGKILL: - if print_sleep_periods: - log(' sleep {}'.format(min_delay_after_sigkill)) - sleep(min_delay_after_sigkill) - else: - if print_sleep_periods: - log('Sleep {} sec after implementing a corrective action'.format( - min_delay_after_sigterm)) - sleep(min_delay_after_sigterm) - - -def get_pid_list(): - """ - Find pid list expect kthreads and zombies - """ - pid_list = [] - for pid in os.listdir('/proc'): - if os.path.exists('/proc/' + pid + '/exe') is True: - pid_list.append(pid) - return pid_list - - -pid_list = get_pid_list() - - -def get_non_decimal_pids(): - """ - """ - non_decimal_list = [] - for pid in pid_list: - if pid[0].isdecimal() is False: - non_decimal_list.append(pid) - return non_decimal_list - - -def find_victim(_print_proc_table): - """ - Find the process with highest badness and its badness adjustment - Return pid and badness - """ - - ft1 = time() - - pid_list = get_pid_list() - - pid_list.remove(self_pid) - - if '1' in pid_list: - pid_list.remove('1') - - non_decimal_list = get_non_decimal_pids() - - for i in non_decimal_list: - if i in pid_list: # ???????????????????????????????????????????? - pid_list.remove(i) - - pid_badness_list = [] - - if _print_proc_table: - - if extra_table_info == 'None': - extra_table_title = '' - - elif extra_table_info == 'cgroup_v1': - extra_table_title = 'CGroup_v1' - - elif extra_table_info == 'cgroup_v2': - extra_table_title = 'CGroup_v2' - - elif extra_table_info == 'cmdline': - extra_table_title = 'cmdline' - - elif extra_table_info == 'environ': - extra_table_title = 'environ' - - elif extra_table_info == 'realpath': - extra_table_title = 'realpath' - - elif extra_table_info == 'All': - extra_table_title = '[CGroup] [CmdLine] [RealPath]' - else: - extra_table_title = '' - - hr = '#' * 115 - - log(hr) - log('# PID PPID badness oom_score oom_score_adj eUID S VmSize VmRSS VmSwap Name {}'.format( - extra_table_title)) - log('#------- ------- ------- --------- ------------- ---------- - ------ ----- ------ --------------- --------') - - for pid in pid_list: - - badness = pid_to_badness(pid)[0] - - if badness is None: - continue - - if _print_proc_table: - - try: - oom_score = rline1('/proc/' + pid + '/oom_score') - oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') - except FileNotFoundError: - continue - - if pid_to_status(pid) is None: - continue - else: - (name, state, ppid, uid, vm_size, vm_rss, - vm_swap) = pid_to_status(pid) - - if extra_table_info == 'None': - extra_table_line = '' - - elif extra_table_info == 'cgroup_v1': - extra_table_line = pid_to_cgroup_v1(pid) - - elif extra_table_info == 'cgroup_v2': - extra_table_line = pid_to_cgroup_v2(pid) - - elif extra_table_info == 'cmdline': - extra_table_line = pid_to_cmdline(pid) - - elif extra_table_info == 'environ': - extra_table_line = pid_to_environ(pid) - - elif extra_table_info == 'realpath': - extra_table_line = pid_to_realpath(pid) - - elif extra_table_info == 'All': - extra_table_line = '[CG: {}] [CL: {}] [RP: {}]'.format( - pid_to_cgroup_v1(pid), - pid_to_cmdline(pid), - pid_to_realpath(pid) - ) - else: - extra_table_line = '' - - log('#{} {} {} {} {} {} {} {} {} {} {} {}'.format( - pid.rjust(7), - ppid.rjust(7), - str(badness).rjust(7), - oom_score.rjust(9), - oom_score_adj.rjust(13), - uid.rjust(10), - state, - str(vm_size).rjust(6), - str(vm_rss).rjust(5), - str(vm_swap).rjust(6), - name.ljust(15), - extra_table_line - ) - ) - - pid_badness_list.append((pid, badness)) - - real_proc_num = len(pid_badness_list) - - # Make list of (pid, badness) tuples, sorted by 'badness' values - # print(pid_badness_list) - pid_tuple_list = sorted( - pid_badness_list, - key=itemgetter(1), - reverse=True - )[0] - - pid = pid_tuple_list[0] - - # Get maximum 'badness' value - victim_badness = pid_tuple_list[1] - victim_name = pid_to_name(pid) - - if _print_proc_table: - log(hr) - - log('Found {} processes with existing realpaths'.format(real_proc_num)) - - log( - 'Process with highest badness (found in {} ms):\n PID: {}, Na' - 'me: {}, badness: {}'.format( - round((time() - ft1) * 1000), - pid, - victim_name, - victim_badness - ) - ) - - return pid, victim_badness, victim_name - - -def find_victim_info(pid, victim_badness, name): - """ - """ - status0 = time() - - try: - - with open('/proc/' + pid + '/status') as f: - - for n, line in enumerate(f): - - if n is state_index: - state = line.split('\t')[1].rstrip() - continue - - if n is ppid_index: - ppid = line.split('\t')[1] - continue - - if n is uid_index: - uid = line.split('\t')[2] - continue - - if n is vm_size_index: - vm_size = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if n is vm_rss_index: - vm_rss = kib_to_mib(int(line.split('\t')[1][:-4])) - continue - - if detailed_rss: - - if n is anon_index: - anon_rss = kib_to_mib( - int(line.split('\t')[1][:-4])) - continue - - if n is file_index: - file_rss = kib_to_mib( - int(line.split('\t')[1][:-4])) - continue - - if n is shmem_index: - shmem_rss = kib_to_mib( - int(line.split('\t')[1][:-4])) - continue - - if n is vm_swap_index: - vm_swap = kib_to_mib(int(line.split('\t')[1][:-4])) - break - - cmdline = pid_to_cmdline(pid) - oom_score = rline1('/proc/' + pid + '/oom_score') - oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') - - except FileNotFoundError: - log('The victim died in the search process: FileNotFoundError') - update_stat_dict_and_print( - 'The victim died in the search process: FileNotFoundError') - return None - except ProcessLookupError: - log('The victim died in the search process: ProcessLookupError') - update_stat_dict_and_print( - 'The victim died in the search process: ProcessLookupError') - return None - except UnicodeDecodeError: - - with open('/proc/' + pid + '/status', 'rb') as f: - f_list = f.read().decode('utf-8', 'ignore').split('\n') - - for i in range(len(f_list)): - - if i is state_index: - state = f_list[i].split('\t')[1].rstrip() - - if i is ppid_index: - ppid = f_list[i].split('\t')[1] - - if i is uid_index: - uid = f_list[i].split('\t')[2] - - if i is vm_size_index: - vm_size = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is vm_rss_index: - vm_rss = kib_to_mib(int(f_list[i].split('\t')[1][:-3])) - - if detailed_rss: - - if i is anon_index: - anon_rss = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is file_index: - file_rss = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is shmem_index: - shmem_rss = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - if i is vm_swap_index: - vm_swap = kib_to_mib( - int(f_list[i].split('\t')[1][:-3])) - - cmdline = pid_to_cmdline(pid) - oom_score = rline1('/proc/' + pid + '/oom_score') - oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj') - - except IndexError: - log('The victim died in the search process: IndexError') - update_stat_dict_and_print( - 'The victim died in the search process: IndexError') - return None - except ValueError: - log('The victim died in the search process: ValueError') - update_stat_dict_and_print( - 'The victim died in the search process: ValueError') - return None - except FileNotFoundError: - log('The victim died in the search process: FileNotFoundError') - update_stat_dict_and_print( - 'The victim died in the search process: FileNotFoundError') - return None - except ProcessLookupError: - log('The victim died in the search process: ProcessLookupError') - update_stat_dict_and_print( - 'The victim died in the search process: ProcessLookupError') - return None - - # print((time() - status0) * 1000, 'status time') - - len_vm = len(str(vm_size)) - - try: - realpath = os.path.realpath('/proc/' + pid + '/exe') - victim_lifetime = format_time(uptime() - pid_to_starttime(pid)) - victim_cgroup_v1 = pid_to_cgroup_v1(pid) - victim_cgroup_v2 = pid_to_cgroup_v2(pid) - - except FileNotFoundError: - print('The victim died in the search process: FileNotFoundError') - update_stat_dict_and_print( - 'The victim died in the search process: FileNotFoundError') - return None - - # te1 = time() - ancestry = pid_to_ancestry(pid, max_ancestry_depth) - # print((time() - te1) * 1000, 'ms, ancestry') - - if detailed_rss: - detailed_rss_info = ' (' \ - 'Anon: {} MiB, ' \ - 'File: {} MiB, ' \ - 'Shmem: {} MiB)'.format( - anon_rss, - file_rss, - shmem_rss) - else: - detailed_rss_info = '' - - victim_info = 'Victim information (found in {} ms):' \ - '\n Name: {}' \ - '\n State: {}' \ - '\n PID: {}' \ - '{}' \ - '\n EUID: {}' \ - '\n badness: {}, ' \ - 'oom_score: {}, ' \ - 'oom_score_adj: {}' \ - '\n VmSize: {} MiB' \ - '\n VmRSS: {} MiB {}' \ - '\n VmSwap: {} MiB' \ - '\n CGroup_v1: {}' \ - '\n CGroup_v2: {}' \ - '\n Realpath: {}' \ - '\n Cmdline: {}' \ - '\n Lifetime: {}'.format( - round((time() - status0) * 1000), - name, - state, - pid, - ancestry, - uid, - victim_badness, - oom_score, - oom_score_adj, - vm_size, - str(vm_rss).rjust(len_vm), - detailed_rss_info, - str(vm_swap).rjust(len_vm), - victim_cgroup_v1, - victim_cgroup_v2, - realpath, - cmdline, - victim_lifetime) - - return victim_info - - -# для дедупликации уведомлений -dick = dict() -dick['v'] = [1, 2, 3, time()] - - -def implement_corrective_action(signal): - """ - Find victim with highest badness and send SIGTERM/SIGKILL - """ - - notif = True - - log(mem_info) - - pid, victim_badness, name = find_victim(print_proc_table) - - if victim_badness >= min_badness: - - if print_victim_info: - victim_info = find_victim_info(pid, victim_badness, name) - log(victim_info) - - # kill the victim if it doesn't respond to SIGTERM - if signal is SIGTERM: - victim_id = get_victim_id(pid) - if victim_id not in victim_dict: - victim_dict.update({victim_id: time()}) - else: - if time() - victim_dict[ - victim_id] > max_post_sigterm_victim_lifetime: - print( - '\nmax_post_sigterm_victim_lifetime excee' - 'ded: the victim will get SIGKILL' - ) - signal = SIGKILL - - soft_match = False - - if soft_actions and signal is SIGTERM: - # если мягкий порог И список мягких не пуст: - # итерируемся по списку, ища мэтчинги. Есть совпадения - выполн - # команду и выход из цикла. - name = pid_to_name(pid) - cgroup_v1 = pid_to_cgroup_v1(pid) - service = '' - cgroup_v1_tail = cgroup_v1.rpartition('/')[2] - # log(cgroup_v1_tail) - if cgroup_v1_tail.endswith('.service'): - service = cgroup_v1_tail - # print('$SERVICE:', [service]) - # print('ИЩЕМ СОВПАДЕНИЯ ДЛЯ МЯГКИХ ДЕЙСТВИЙ') - # итерируемся по списку кортежей - for i in soft_actions_list: - unit = i[0] - if unit == 'name': - u = name - else: - u = cgroup_v1 - regexp = i[1] - command = i[2] - # print([u, regexp, command]) - if search(regexp, u) is not None: - log("Regexp '{}' matches with {} '{}'".format(regexp, unit, u)) - # print('СОВПАДЕНИЕ НАЙДЕНО') - soft_match = True - break - - if soft_match: - - # todo: make new func - m = check_mem_and_swap() - ma = int(m[0]) / 1024.0 - sf = int(m[2]) / 1024.0 - log('Memory status before implementing a corrective act' - 'ion:\n MemAvailable' - ': {} MiB, SwapFree: {} MiB'.format( - round(ma, 1), round(sf, 1) - ) - ) - - cmd = command.replace( - '$PID', - pid).replace( - '$NAME', - pid_to_name(pid)).replace( - '$SERVICE', - service) - - exit_status = exe(cmd) - - exit_status = str(exit_status) - - response_time = time() - time0 - - etc_info = 'Implement a corrective act' \ - 'ion:\n Run the command: {}' \ - '\n Exit status: {}; total response ' \ - 'time: {} ms'.format( - cmd, - exit_status, - round(response_time * 1000)) - - log(etc_info) - - key = "Run the command '{}'".format(cmd) - update_stat_dict_and_print(key) - - if gui_notifications: - send_notify_etc( - pid, - name, - command.replace('$PID', pid).replace( - '$NAME', pid_to_name(pid))) - - else: - - try: - - mem_available, swap_total, swap_free = check_mem_and_swap() - - ma_mib = int(mem_available) / 1024.0 - sf_mib = int(swap_free) / 1024.0 - log('Memory status before implementing a corrective act' - 'ion:\n MemAvailable' - ': {} MiB, SwapFree: {} MiB'.format( - round(ma_mib, 1), round(sf_mib, 1) - ) - ) - - if (mem_available <= mem_min_sigkill_kb and - swap_free <= swap_min_sigkill_kb): - log('Hard threshold exceeded') - signal = SIGKILL - - os.kill(int(pid), signal) - response_time = time() - time0 - send_result = 'total response time: {} ms'.format( - round(response_time * 1000)) - - preventing_oom_message = 'Implement a corrective action:' \ - '\n Send {} to the victim; {}'.format( - sig_dict[signal], send_result) - - key = 'Send {} to {}'.format(sig_dict[signal], name) - - if signal is SIGKILL and post_kill_exe != '': - - cmd = post_kill_exe.replace('$PID', pid).replace( - '$NAME', pid_to_name(pid)) - - log('Execute post_kill_exe') - - exe(cmd) - - if gui_notifications: - - # min delay after same notification - # все не так. От этого вообще пол дедупликация . терминация - # один раз покажется при любом раскладе. - delay_after_same_notify = 1 - - x = dick['v'] - - dick['v'] = [signal, name, pid, time()] - - y = dick['v'] - - # print(y[3] - x[3]) - - if x[0] == y[0] and x[1] == y[1] and x[2] == y[2]: - # print('совпадение имени, пид, сигнала') - - # сохр в словаре первре совпавшее время - dt = y[3] - x[3] - # print(dt, 'dt') - if dt < delay_after_same_notify: - notif = False - - if notif: - send_notify(signal, name, pid) - - except FileNotFoundError: - response_time = time() - time0 - send_result = 'no such process; response time: {} ms'.format( - round(response_time * 1000)) - key = 'FileNotFoundError (the victim died in the se' \ - 'arch process): ' - except ProcessLookupError: - response_time = time() - time0 - send_result = 'no such process; response time: {} ms'.format( - round(response_time * 1000)) - key = 'ProcessLookupError (the victim died in the se' \ - 'arch process): ' - - try: - log(preventing_oom_message) - except UnboundLocalError: - preventing_oom_message = key - - update_stat_dict_and_print(key) - - else: - - response_time = time() - time0 - victim_badness_is_too_small = 'victim badness {} < min_b' \ - 'adness {}; nothing to do; response time: {} ms'.format( - victim_badness, - min_badness, - round(response_time * 1000)) - - log(victim_badness_is_too_small) - - # update stat_dict - key = 'victim badness < min_badness' - update_stat_dict_and_print(key) - - sleep_after_send_signal(signal) - - -def sleep_after_check_mem(): - """Specify sleep times depends on rates and avialable memory.""" - - if mem_min_sigkill_kb < mem_min_sigterm_kb: - mem_point = mem_available - mem_min_sigterm_kb - else: - mem_point = mem_available - mem_min_sigkill_kb - - if swap_min_sigkill_kb < swap_min_sigterm_kb: - swap_point = swap_free - swap_min_sigterm_kb - else: - swap_point = swap_free - swap_min_sigkill_kb - - if swap_point < 0: - swap_point = 0 - - if mem_point < 0: - mem_point = 0 - - t_mem = mem_point / rate_mem - t_swap = swap_point / rate_swap - t_zram = (mem_total * 0.8 - mem_used_zram) / rate_zram - if t_zram < 0: - t_zram = 0 - - t_mem_swap = t_mem + t_swap - t_mem_zram = t_mem + t_zram - - if t_mem_swap <= t_mem_zram: - t = t_mem_swap - else: - t = t_mem_zram - - if t > max_sleep_time: - t = max_sleep_time - elif t < min_sleep_time: - t = min_sleep_time - else: - pass - - if print_sleep_periods: - - log( - 'Sleep {} sec (t_mem={}, t_swap={}, t_zram={})'.format( - round(t, 2), - round(t_mem, 2), - round(t_swap, 2), - round(t_zram, 2) - ) - ) - - try: - stdout.flush() - except OSError: # OSError: [Errno 105] No buffer space available - pass - - sleep(t) - - -def calculate_percent(arg_key): - """ - parse conf dict - Calculate mem_min_KEY_percent. - - Try use this one) - arg_key: str key for config_dict - returns int mem_min_percent or NoneType if got some error - """ - - if arg_key in config_dict: - mem_min = config_dict[arg_key] - - if mem_min.endswith('%'): - # truncate percents, so we have a number - mem_min_percent = mem_min[:-1].strip() - # then 'float test' - mem_min_percent = string_to_float_convert_test(mem_min_percent) - if mem_min_percent is None: - errprint('Invalid {} value, not float\nExit'.format(arg_key)) - exit(1) - # Final validations... - if mem_min_percent < 0 or mem_min_percent > 100: - errprint( - '{}, as percents value, out of ran' - 'ge [0; 100]\nExit'.format(arg_key)) - exit(1) - - # mem_min_sigterm_percent is clean and valid float percentage. Can - # translate into Kb - mem_min_kb = mem_min_percent / 100 * mem_total - mem_min_mb = round(mem_min_kb / 1024) - - elif mem_min.endswith('M'): - mem_min_mb = string_to_float_convert_test(mem_min[:-1].strip()) - if mem_min_mb is None: - errprint('Invalid {} value, not float\nExit'.format(arg_key)) - exit(1) - mem_min_kb = mem_min_mb * 1024 - if mem_min_kb > mem_total: - errprint( - '{} value can not be greater then MemT' - 'otal ({} MiB)\nExit'.format( - arg_key, round( - mem_total / 1024))) - exit(1) - mem_min_percent = mem_min_kb / mem_total * 100 - - else: - log('Invalid {} units in config.\n Exit'.format(arg_key)) - exit(1) - mem_min_percent = None - - else: - log('{} not in config\nExit'.format(arg_key)) - exit(1) - mem_min_percent = None - - return mem_min_kb, mem_min_mb, mem_min_percent - - -########################################################################## - - -print_proc_table_flag = False - -# print(len(argv), argv) - -if len(argv) == 1: - if os.path.exists('./nohang.conf'): - config = os.getcwd() + '/nohang.conf' - else: - config = '/etc/nohang/nohang.conf' - -elif len(argv) == 2: - if argv[1] == '--help' or argv[1] == '-h': - print(help_mess) - exit() - elif argv[1] == '--version' or argv[1] == '-v': - print_version() - elif argv[1] == '--test' or argv[1] == '-t': - test() - elif argv[1] == '--print-proc-table' or argv[1] == '-p': - print_proc_table_flag = True - if os.path.exists('./nohang.conf'): - config = os.getcwd() + '/nohang.conf' - else: - config = '/etc/nohang/nohang.conf' - else: - errprint('Unknown option: {}'.format(argv[1])) - exit(1) - -elif len(argv) == 3: - if argv[1] == '--config' or argv[1] == '-c': - config = argv[2] - else: - errprint('Unknown option: {}'.format(argv[1])) - exit(1) - -else: - errprint('Invalid CLI input: too many options') - exit(1) - - -########################################################################## - - -# find mem_total -# find positions of SwapFree and SwapTotal in /proc/meminfo - -with open('/proc/meminfo') as f: - mem_list = f.readlines() - -mem_list_names = [] -for s in mem_list: - mem_list_names.append(s.split(':')[0]) - -if mem_list_names[2] != 'MemAvailable': - errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied') - # exit(1) - -swap_total_index = mem_list_names.index('SwapTotal') -swap_free_index = swap_total_index + 1 - -mem_total = int(mem_list[0].split(':')[1][:-4]) - -# Get names from /proc/*/status to be able to get VmRSS and VmSwap values - -with open('/proc/self/status') as file: - status_list = file.readlines() - -status_names = [] -for s in status_list: - status_names.append(s.split(':')[0]) - -ppid_index = status_names.index('PPid') -vm_size_index = status_names.index('VmSize') -vm_rss_index = status_names.index('VmRSS') -vm_swap_index = status_names.index('VmSwap') -uid_index = status_names.index('Uid') -state_index = status_names.index('State') - - -try: - anon_index = status_names.index('RssAnon') - file_index = status_names.index('RssFile') - shmem_index = status_names.index('RssShmem') - detailed_rss = True - # print(detailed_rss, 'detailed_rss') -except ValueError: - detailed_rss = False - # print('It is not Linux 4.5+') - -########################################################################## - - -log('Config: ' + config) - - -########################################################################## - -# parsing the config with obtaining the parameters dictionary - -# conf_parameters_dict -# conf_restart_dict - -# dictionary with config options -config_dict = dict() - -processname_re_list = [] -cmdline_re_list = [] -environ_re_list = [] -uid_re_list = [] -cgroup_v1_re_list = [] -cgroup_v2_re_list = [] -realpath_re_list = [] - -soft_actions_list = [] - - -# separator for optional parameters (that starts with @) -opt_separator = '///' - - -# stupid conf parsing, need refactoring -try: - with open(config) as f: - - for line in f: - - a = line.startswith('#') - b = line.startswith('\n') - c = line.startswith('\t') - d = line.startswith(' ') - - etc = line.startswith('@SOFT_ACTION_RE_NAME') - etc2 = line.startswith('@SOFT_ACTION_RE_CGROUP_V1') - - if not a and not b and not c and not d and not etc and not etc2: - a = line.partition('=') - - key = a[0].strip() - value = a[2].strip() - - if key not in config_dict: - config_dict[key] = value - else: - log('ERROR: config key duplication: {}'.format(key)) - exit(1) - - if etc: - - # это остаток строки без первого ключа. Содержит: регулярка /// - # команда - a = line.partition('@SOFT_ACTION_RE_NAME')[ - 2].partition(opt_separator) - - a1 = 'name' - - a2 = a[0].strip() - valid_re(a2) - - a3 = a[2].strip() - - zzz = (a1, a2, a3) - - # print(zzz) - - soft_actions_list.append(zzz) - - if etc2: - - # это остаток строки без первого ключа. Содержит: регулярка /// - # команда - a = line.partition('@SOFT_ACTION_RE_CGROUP_V1')[ - 2].partition(opt_separator) - - a1 = 'cgroup_v1' - - a2 = a[0].strip() - valid_re(a2) - - a3 = a[2].strip() - - zzz = (a1, a2, a3) - - # print(zzz) - - soft_actions_list.append(zzz) - - if line.startswith('@PROCESSNAME_RE'): - a = line.partition( - '@PROCESSNAME_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - processname_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@CMDLINE_RE'): - a = line.partition( - '@CMDLINE_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - cmdline_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@UID_RE'): - a = line.partition( - '@UID_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - uid_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@CGROUP_V1_RE'): - a = line.partition( - '@CGROUP_V1_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - cgroup_v1_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@CGROUP_V2_RE'): - a = line.partition( - '@CGROUP_V2_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - cgroup_v2_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@REALPATH_RE'): - a = line.partition( - '@REALPATH_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - realpath_re_list.append((badness_adj, reg_exp)) - - if line.startswith('@ENVIRON_RE'): - a = line.partition( - '@ENVIRON_RE')[2].strip(' \n').partition(opt_separator) - badness_adj = a[0].strip(' ') - reg_exp = a[2].strip(' ') - valid_re(reg_exp) - environ_re_list.append((badness_adj, reg_exp)) - - -except PermissionError: - errprint('PermissionError', conf_err_mess) - exit(1) -except UnicodeDecodeError: - errprint('UnicodeDecodeError', conf_err_mess) - exit(1) -except IsADirectoryError: - errprint('IsADirectoryError', conf_err_mess) - exit(1) -except IndexError: - errprint('IndexError', conf_err_mess) - exit(1) -except FileNotFoundError: - errprint('FileNotFoundError', conf_err_mess) - exit(1) - - -if processname_re_list == []: - regex_matching = False -else: - regex_matching = True - - -if cmdline_re_list == []: - re_match_cmdline = False -else: - re_match_cmdline = True - - -if uid_re_list == []: - re_match_uid = False -else: - re_match_uid = True - - -if environ_re_list == []: - re_match_environ = False -else: - re_match_environ = True - - -if realpath_re_list == []: - re_match_realpath = False -else: - re_match_realpath = True - - -if cgroup_v1_re_list == []: - re_match_cgroup_v1 = False -else: - re_match_cgroup_v1 = True - -if cgroup_v2_re_list == []: - re_match_cgroup_v2 = False -else: - re_match_cgroup_v2 = True - - -# print(processname_re_list) -# print(cmdline_re_list) -# print(uid_re_list) -# print(environ_re_list) -# print(realpath_re_list) -# print(cgroup_v1_re_list) -# print(cgroup_v2_re_list) - - -# print(soft_actions_list) - -if soft_actions_list == []: - soft_actions = False -else: - soft_actions = True - -# print('soft_actions:', soft_actions) - -########################################################################## - - -# extracting parameters from the dictionary -# check for all necessary parameters -# validation of all parameters -psi_debug = conf_parse_bool('psi_debug') -print_total_stat = conf_parse_bool('print_total_stat') -print_proc_table = conf_parse_bool('print_proc_table') -forbid_negative_badness = conf_parse_bool('forbid_negative_badness') -print_victim_info = conf_parse_bool('print_victim_info') -print_config = conf_parse_bool('print_config') -print_mem_check_results = conf_parse_bool('print_mem_check_results') -print_sleep_periods = conf_parse_bool('print_sleep_periods') -gui_low_memory_warnings = conf_parse_bool('gui_low_memory_warnings') -gui_notifications = conf_parse_bool('gui_notifications') -decrease_oom_score_adj = conf_parse_bool('decrease_oom_score_adj') -ignore_psi = conf_parse_bool('ignore_psi') - - -# regex_matching = conf_parse_bool('regex_matching') -# re_match_cmdline = conf_parse_bool('re_match_cmdline') -# re_match_uid = conf_parse_bool('re_match_uid') -# re_match_cgroup_v1 = conf_parse_bool('re_match_cgroup_v1') -# re_match_cgroup_v2 = conf_parse_bool('re_match_cgroup_v2') -# re_match_realpath = conf_parse_bool('re_match_realpath') -# re_match_environ = conf_parse_bool('re_match_environ') - - -# if regex_matching or re_match_cmdline or re_match_uid or re_match_cgroup -# or re_match_realpath: -# from re import search -# from sre_constants import error as invalid_re - -(mem_min_sigterm_kb, mem_min_sigterm_mb, mem_min_sigterm_percent - ) = calculate_percent('mem_min_sigterm') - -(mem_min_sigkill_kb, mem_min_sigkill_mb, mem_min_sigkill_percent - ) = calculate_percent('mem_min_sigkill') - -(zram_max_sigterm_kb, zram_max_sigterm_mb, zram_max_sigterm_percent - ) = calculate_percent('zram_max_sigterm') - -(zram_max_sigkill_kb, zram_max_sigkill_mb, zram_max_sigkill_percent - ) = calculate_percent('zram_max_sigkill') - -(mem_min_warnings_kb, mem_min_warnings_mb, mem_min_warnings_percent - ) = calculate_percent('mem_min_warnings') - -(zram_max_warnings_kb, zram_max_warnings_mb, zram_max_warnings_percent - ) = calculate_percent('zram_max_warnings') - - -if 'rate_mem' in config_dict: - rate_mem = string_to_float_convert_test(config_dict['rate_mem']) - if rate_mem is None: - errprint('Invalid rate_mem value, not float\nExit') - exit(1) - if rate_mem <= 0: - errprint('rate_mem MUST be > 0\nExit') - exit(1) -else: - errprint('rate_mem not in config\nExit') - exit(1) - - -if 'rate_swap' in config_dict: - rate_swap = string_to_float_convert_test(config_dict['rate_swap']) - if rate_swap is None: - errprint('Invalid rate_swap value, not float\nExit') - exit(1) - if rate_swap <= 0: - errprint('rate_swap MUST be > 0\nExit') - exit(1) -else: - errprint('rate_swap not in config\nExit') - exit(1) - - -if 'rate_zram' in config_dict: - rate_zram = string_to_float_convert_test(config_dict['rate_zram']) - if rate_zram is None: - errprint('Invalid rate_zram value, not float\nExit') - exit(1) - if rate_zram <= 0: - errprint('rate_zram MUST be > 0\nExit') - exit(1) -else: - errprint('rate_zram not in config\nExit') - exit(1) - - -if 'swap_min_sigterm' in config_dict: - swap_min_sigterm = config_dict['swap_min_sigterm'] -else: - errprint('swap_min_sigterm not in config\nExit') - exit(1) - - -if 'swap_min_sigkill' in config_dict: - swap_min_sigkill = config_dict['swap_min_sigkill'] -else: - errprint('swap_min_sigkill not in config\nExit') - exit(1) - - -if 'min_delay_after_sigterm' in config_dict: - min_delay_after_sigterm = string_to_float_convert_test( - config_dict['min_delay_after_sigterm']) - if min_delay_after_sigterm is None: - errprint('Invalid min_delay_after_sigterm value, not float\nExit') - exit(1) - if min_delay_after_sigterm < 0: - errprint('min_delay_after_sigterm must be positiv\nExit') - exit(1) -else: - errprint('min_delay_after_sigterm not in config\nExit') - exit(1) - - -if 'min_delay_after_sigkill' in config_dict: - min_delay_after_sigkill = string_to_float_convert_test( - config_dict['min_delay_after_sigkill']) - if min_delay_after_sigkill is None: - errprint('Invalid min_delay_after_sigkill value, not float\nExit') - exit(1) - if min_delay_after_sigkill < 0: - errprint('min_delay_after_sigkill must be positive\nExit') - exit(1) -else: - errprint('min_delay_after_sigkill not in config\nExit') - exit(1) - - -if 'psi_post_action_delay' in config_dict: - psi_post_action_delay = string_to_float_convert_test( - config_dict['psi_post_action_delay']) - if psi_post_action_delay is None: - errprint('Invalid psi_post_action_delay value, not float\nExit') - exit(1) - if psi_post_action_delay < 0: - errprint('psi_post_action_delay must be positive\nExit') - exit(1) -else: - errprint('psi_post_action_delay not in config\nExit') - exit(1) - - -if 'sigkill_psi_threshold' in config_dict: - sigkill_psi_threshold = string_to_float_convert_test( - config_dict['sigkill_psi_threshold']) - if sigkill_psi_threshold is None: - errprint('Invalid sigkill_psi_threshold value, not float\nExit') - exit(1) - if sigkill_psi_threshold < 0 or sigkill_psi_threshold > 100: - errprint('sigkill_psi_threshold must be in the range [0; 100]\nExit') - exit(1) -else: - errprint('sigkill_psi_threshold not in config\nExit') - exit(1) - - -if 'sigterm_psi_threshold' in config_dict: - sigterm_psi_threshold = string_to_float_convert_test( - config_dict['sigterm_psi_threshold']) - if sigterm_psi_threshold is None: - errprint('Invalid sigterm_psi_threshold value, not float\nExit') - exit(1) - if sigterm_psi_threshold < 0 or sigterm_psi_threshold > 100: - errprint('sigterm_psi_threshold must be in the range [0; 100]\nExit') - exit(1) -else: - errprint('sigterm_psi_threshold not in config\nExit') - exit(1) - - -if 'min_badness' in config_dict: - min_badness = string_to_int_convert_test( - config_dict['min_badness']) - if min_badness is None: - errprint('Invalid min_badness value, not integer\nExit') - exit(1) - if min_badness < 0 or min_badness > 1000: - errprint('Invalud min_badness value\nExit') - exit(1) -else: - errprint('min_badness not in config\nExit') - exit(1) - - -if 'oom_score_adj_max' in config_dict: - oom_score_adj_max = string_to_int_convert_test( - config_dict['oom_score_adj_max']) - if oom_score_adj_max is None: - errprint('Invalid oom_score_adj_max value, not integer\nExit') - exit(1) - if oom_score_adj_max < 0 or oom_score_adj_max > 1000: - errprint('Invalid oom_score_adj_max value\nExit') - exit(1) -else: - errprint('oom_score_adj_max not in config\nExit') - exit(1) - - -if 'min_time_between_warnings' in config_dict: - min_time_between_warnings = string_to_float_convert_test( - config_dict['min_time_between_warnings']) - if min_time_between_warnings is None: - errprint('Invalid min_time_between_warnings value, not float\nExit') - exit(1) - if min_time_between_warnings < 1 or min_time_between_warnings > 300: - errprint('min_time_between_warnings value out of range [1; 300]\nExit') - exit(1) -else: - errprint('min_time_between_warnings not in config\nExit') - exit(1) - - -if 'swap_min_warnings' in config_dict: - swap_min_warnings = config_dict['swap_min_warnings'] -else: - errprint('swap_min_warnings not in config\nExit') - exit(1) - - -if 'max_ancestry_depth' in config_dict: - max_ancestry_depth = string_to_int_convert_test( - config_dict['max_ancestry_depth']) - if min_badness is None: - errprint('Invalid max_ancestry_depth value, not integer\nExit') - exit(1) - if max_ancestry_depth < 1: - errprint('Invalud max_ancestry_depth value\nExit') - exit(1) -else: - errprint('max_ancestry_depth is not in config\nExit') - exit(1) - - -if 'max_post_sigterm_victim_lifetime' in config_dict: - max_post_sigterm_victim_lifetime = string_to_float_convert_test( - config_dict['max_post_sigterm_victim_lifetime']) - if max_post_sigterm_victim_lifetime is None: - errprint('Invalid max_post_sigterm_victim_lifetime val' - 'ue, not float\nExit') - exit(1) - if max_post_sigterm_victim_lifetime < 0: - errprint('max_post_sigterm_victim_lifetime must be non-n' - 'egative number\nExit') - exit(1) -else: - errprint('max_post_sigterm_victim_lifetime is not in config\nExit') - exit(1) - - -if 'post_kill_exe' in config_dict: - post_kill_exe = config_dict['post_kill_exe'] -else: - errprint('post_kill_exe is not in config\nExit') - exit(1) - - -if 'psi_path' in config_dict: - psi_path = config_dict['psi_path'] -else: - errprint('psi_path is not in config\nExit') - exit(1) - - -if 'psi_metrics' in config_dict: - psi_metrics = config_dict['psi_metrics'] -else: - errprint('psi_metrics is not in config\nExit') - exit(1) - - -if 'warning_exe' in config_dict: - warning_exe = config_dict['warning_exe'] - if warning_exe != '': - check_warning_exe = True - else: - check_warning_exe = False -else: - errprint('warning_exe is not in config\nExit') - exit(1) - - -if 'extra_table_info' in config_dict: - extra_table_info = config_dict['extra_table_info'] - if (extra_table_info != 'None' and - extra_table_info != 'cgroup_v1' and - extra_table_info != 'cgroup_v2' and - extra_table_info != 'cmdline' and - extra_table_info != 'environ' and - extra_table_info != 'realpath' and - extra_table_info != 'All'): - - errprint('Invalid config: invalid extra_table_info value\nExit') - exit(1) -else: - errprint('Invalid config: extra_table_info is not in config\nExit') - exit(1) - - -separate_log = conf_parse_bool('separate_log') - -if separate_log: - - import logging - from logging import basicConfig - from logging import info - - log_dir = '/var/log/nohang' - - try: - os.mkdir(log_dir) - except PermissionError: - print('ERROR: can not create log dir') - except FileExistsError: - pass - - logfile = log_dir + '/nohang.log' - - try: - with open(logfile, 'a') as f: - pass - except FileNotFoundError: - print('ERROR: log FileNotFoundError') - except PermissionError: - print('ERROR: log PermissionError') - - try: - basicConfig( - filename=logfile, - level=logging.INFO, - format="%(asctime)s: %(message)s") - except PermissionError: - errprint('ERROR: Permission denied: {}'.format(logfile)) - except FileNotFoundError: - errprint('ERROR: FileNotFoundError: {}'.format(logfile)) - - -if 'min_mem_report_interval' in config_dict: - min_mem_report_interval = string_to_float_convert_test( - config_dict['min_mem_report_interval']) - if min_mem_report_interval is None: - errprint('Invalid min_mem_report_interval value, not float\nExit') - exit(1) - if min_mem_report_interval < 0: - errprint('min_mem_report_interval must be non-negative number\nExit') - exit(1) -else: - errprint('min_mem_report_interval is not in config\nExit') - exit(1) - - -if 'max_sleep_time' in config_dict: - max_sleep_time = string_to_float_convert_test( - config_dict['max_sleep_time']) - if max_sleep_time is None: - errprint('Invalid max_sleep_time value, not float\nExit') - exit(1) - if max_sleep_time <= 0: - errprint('max_sleep_time must be positive number\nExit') - exit(1) -else: - errprint('max_sleep_time is not in config\nExit') - exit(1) - - -if 'min_sleep_time' in config_dict: - min_sleep_time = string_to_float_convert_test( - config_dict['min_sleep_time']) - if min_sleep_time is None: - errprint('Invalid min_sleep_time value, not float\nExit') - exit(1) - if min_sleep_time <= 0: - errprint('min_sleep_time must be positive number\nExit') - exit(1) -else: - errprint('min_sleep_time is not in config\nExit') - exit(1) - - -if max_sleep_time < min_sleep_time: - errprint( - 'max_sleep_time value must not exceed min_sleep_time value.\nExit' - ) - exit(1) - - -if print_proc_table_flag: - - if not root: - log('WARNING: effective UID != 0; euid={}; processes with other e' - 'uids will be invisible for nohang'.format(self_uid)) - - func_print_proc_table() - - -########################################################################## - - -psi_support = os.path.exists(psi_path) - - -########################################################################## - - -# Get KiB levels if it's possible. - -# получ кб. если не кб - то процент. Если процент - находим кб ниже на -# основе полученного своптотал и процентов. - - -def get_swap_threshold_tuple(string): - # re (Num %, True) or (Num KiB, False) - """Returns KiB value if abs val was set in config, or tuple with %""" - # return tuple with abs and bool: (abs %, True) or (abs MiB, False) - - if string.endswith('%'): - valid = string_to_float_convert_test(string[:-1]) - if valid is None: - errprint('somewhere swap unit is not float_%') - exit(1) - - value = float(string[:-1].strip()) - if value < 0 or value > 100: - errprint('invalid value, must be from the range[0; 100] %') - exit(1) - - return value, True - - elif string.endswith('M'): - valid = string_to_float_convert_test(string[:-1]) - if valid is None: - errprint('somewhere swap unit is not float_M') - exit(1) - - value = float(string[:-1].strip()) * 1024 - if value < 0: - errprint('invalid unit in config (negative value)') - exit(1) - - return value, False - - else: - errprint( - 'Invalid config file. There are invalid units somewhere\nExit') - exit(1) - - -swap_min_sigterm_tuple = get_swap_threshold_tuple(swap_min_sigterm) -swap_min_sigkill_tuple = get_swap_threshold_tuple(swap_min_sigkill) -swap_min_warnings_tuple = get_swap_threshold_tuple(swap_min_warnings) - - -swap_term_is_percent = swap_min_sigterm_tuple[1] -if swap_term_is_percent: - swap_min_sigterm_percent = swap_min_sigterm_tuple[0] -else: - swap_min_sigterm_kb = swap_min_sigterm_tuple[0] - - -swap_kill_is_percent = swap_min_sigkill_tuple[1] -if swap_kill_is_percent: - swap_min_sigkill_percent = swap_min_sigkill_tuple[0] -else: - swap_min_sigkill_kb = swap_min_sigkill_tuple[0] - - -swap_warn_is_percent = swap_min_warnings_tuple[1] -if swap_warn_is_percent: - swap_min_warnings_percent = swap_min_warnings_tuple[0] -else: - swap_min_warnings_kb = swap_min_warnings_tuple[0] - - -########################################################################## - -# outdated section, need fixes - -if print_config: - - print( - '\n1. Memory levels to respond to as an OOM threat\n[display' - 'ing these options need fix]\n') - - print('mem_min_sigterm: {} MiB, {} %'.format( - round(mem_min_sigterm_mb), round(mem_min_sigterm_percent, 1))) - print('mem_min_sigkill: {} MiB, {} %'.format( - round(mem_min_sigkill_mb), round(mem_min_sigkill_percent, 1))) - - print('swap_min_sigterm: {}'.format(swap_min_sigterm)) - print('swap_min_sigkill: {}'.format(swap_min_sigkill)) - - print('zram_max_sigterm: {} MiB, {} %'.format( - round(zram_max_sigterm_mb), round(zram_max_sigterm_percent, 1))) - print('zram_max_sigkill: {} MiB, {} %'.format( - round(zram_max_sigkill_mb), round(zram_max_sigkill_percent, 1))) - - print('\n2. The frequency of checking the level of available m' - 'emory (and CPU usage)\n') - print('rate_mem: {}'.format(rate_mem)) - print('rate_swap: {}'.format(rate_swap)) - print('rate_zram: {}'.format(rate_zram)) - - print('\n3. The prevention of killing innocent victims\n') - print('min_delay_after_sigterm: {}'.format(min_delay_after_sigterm)) - print('min_delay_after_sigkill: {}'.format(min_delay_after_sigkill)) - print('min_badness: {}'.format(min_badness)) - - # False (OK) - OK не нужен когда фолс - print('decrease_oom_score_adj: {}'.format( - decrease_oom_score_adj - )) - if decrease_oom_score_adj: - print('oom_score_adj_max: {}'.format(oom_score_adj_max)) - - print('\n4. Impact on the badness of processes via matching their' - ' names, cmdlines ir UIDs with regular expressions\n') - - print('(todo)') - - print('\n5. The execution of a specific command instead of sen' - 'ding the\nSIGTERM signal\n') - - print('\n6. GUI notifications:\n- OOM prevention results and\n- low m' - 'emory warnings\n') - print('gui_notifications: {}'.format(gui_notifications)) - - print('gui_low_memory_warnings: {}'.format(gui_low_memory_warnings)) - if gui_low_memory_warnings: - print('min_time_between_warnings: {}'.format( - min_time_between_warnings)) - - print('mem_min_warnings: {} MiB, {} %'.format( - round(mem_min_warnings_mb), round(mem_min_warnings_percent, 1))) - - print('swap_min_warnings: {}'.format(swap_min_warnings)) - - print('zram_max_warnings: {} MiB, {} %'.format( - round(zram_max_warnings_mb), round(zram_max_warnings_percent, 1))) - - print('\n7. Output verbosity\n') - print('print_config: {}'.format(print_config)) - print('print_mem_check_results: {}'.format(print_mem_check_results)) - print('print_sleep_periods: {}\n'.format(print_sleep_periods)) - - -########################################################################## - - -# for calculating the column width when printing mem and zram -mem_len = len(str(round(mem_total / 1024.0))) - -if gui_notifications: - notify_sig_dict = {SIGKILL: 'Killing', - SIGTERM: 'Terminating'} - - -# convert rates from MiB/s to KiB/s -rate_mem = rate_mem * 1024 -rate_swap = rate_swap * 1024 -rate_zram = rate_zram * 1024 - - -warn_time_now = 0 -warn_time_delta = 1000 -warn_timer = 0 - - -########################################################################## - - - - -if not root: - log('WARNING: effective UID != 0; euid={}; processes with other e' - 'uids will be invisible for nohang'.format(self_uid)) - - - -# Try to lock all memory - -mlockall() - -########################################################################## - - - -print_self_rss() - - -# if print_proc_table: -# find_victim(print_proc_table) - -log('Monitoring has started!') - -stdout.flush() - -########################################################################## - -psi_avg_string = '' # will be overwritten if PSI monitoring enabled - - -if psi_support and not ignore_psi: - psi_t0 = time() - - -if print_mem_check_results: - - # to find delta mem - wt2 = 0 - new_mem = 0 - - # init mem report interval - report0 = 0 - - -# handle signals -sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP, SIGABRT, SIGSEGV, SIGBUS] -for signum in sig_list: - signal(signum, signal_handler) - - -while True: - - if psi_support and not ignore_psi: - - psi_avg_value = find_psi_metrics_value(psi_path, psi_metrics) - - if print_mem_check_results: - psi_avg_string = 'PSI avg value: {} | '.format( - str(psi_avg_value).rjust(6)) - - if psi_avg_value >= sigkill_psi_threshold: - sigkill_psi_exceeded = True - else: - sigkill_psi_exceeded = False - - if psi_avg_value >= sigterm_psi_threshold: - sigterm_psi_exceeded = True - else: - sigterm_psi_exceeded = False - - if time() - psi_t0 >= psi_post_action_delay: - psi_post_action_delay_exceeded = True - else: - psi_post_action_delay_exceeded = False - - if psi_debug: - log('sigterm_psi_exceeded: {}, sigkill_psi_exceeded: {}, ps' - 'i_post_action_delay_exceeded: {}'.format( - sigterm_psi_exceeded, - sigkill_psi_exceeded, - psi_post_action_delay_exceeded)) - - if sigkill_psi_exceeded and psi_post_action_delay_exceeded: - time0 = time() - mem_info = 'PSI avg value ({}) > sigkill_psi_thresh' \ - 'old ({})'.format( - psi_avg_value, sigkill_psi_threshold) - - implement_corrective_action(SIGKILL) - - psi_t0 = time() - continue - - if sigterm_psi_exceeded and psi_post_action_delay_exceeded: - time0 = time() - mem_info = 'PSI avg value ({}) > sigterm_psi_thre' \ - 'shold ({})'.format(psi_avg_value, sigterm_psi_threshold) - - implement_corrective_action(SIGTERM) - - psi_t0 = time() - continue - - mem_available, swap_total, swap_free = check_mem_and_swap() - - # print(mem_available, swap_total, swap_free) - - # если метры - получаем киб выше и сразу. см. - - # if swap_min_sigkill is set in percent - if swap_kill_is_percent: - swap_min_sigkill_kb = swap_total * swap_min_sigkill_percent / 100.0 - - if swap_term_is_percent: - swap_min_sigterm_kb = swap_total * swap_min_sigterm_percent / 100.0 - - if swap_warn_is_percent: - swap_min_warnings_kb = swap_total * swap_min_warnings_percent / 100.0 - - # в общем случае для работы нужны килобайты. Если в процентах задано - - # находим КБ тут, после получения своптотал. - - mem_used_zram = check_zram() - - if print_mem_check_results: - - wt1 = time() - - delta = (mem_available + swap_free) - new_mem - - t_cycle = wt1 - wt2 - - report_delta = wt1 - report0 - - if report_delta >= min_mem_report_interval: - - mem_report = True - new_mem = mem_available + swap_free - - report0 = wt1 - - else: - mem_report = False - - wt2 = time() - - if mem_report: - - speed = delta / 1024.0 / report_delta - speed_info = ' | dMem: {} M/s'.format( - str(round(speed)).rjust(5) - ) - - # Calculate 'swap-column' width - swap_len = len(str(round(swap_total / 1024.0))) - - # Output available mem sizes - if swap_total == 0 and mem_used_zram == 0: - log('{}MemAvail: {} M, {} %{}'.format( - psi_avg_string, - human(mem_available, mem_len), - just_percent_mem(mem_available / mem_total), - speed_info - ) - ) - - elif swap_total > 0 and mem_used_zram == 0: - log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} %{}'.format( - psi_avg_string, - human(mem_available, mem_len), - just_percent_mem(mem_available / mem_total), - human(swap_free, swap_len), - just_percent_swap(swap_free / (swap_total + 0.1)), - speed_info - ) - ) - - else: - log('{}MemAvail: {} M, {} % | SwapFree: {} M, {} % | Mem' - 'UsedZram: {} M, {} %{}'.format( - psi_avg_string, - human(mem_available, mem_len), - just_percent_mem(mem_available / mem_total), - human(swap_free, swap_len), - just_percent_swap(swap_free / (swap_total + 0.1)), - human(mem_used_zram, mem_len), - just_percent_mem(mem_used_zram / mem_total), - speed_info - ) - ) - - # если swap_min_sigkill задан в абсолютной величине и Swap_total = 0 - if swap_total > swap_min_sigkill_kb: # If swap_min_sigkill is absolute - swap_sigkill_pc = percent(swap_min_sigkill_kb / (swap_total + 0.1)) - else: - swap_sigkill_pc = '-' - - if swap_total > swap_min_sigterm_kb: - swap_sigterm_pc = percent(swap_min_sigterm_kb / (swap_total + 0.1)) - else: - - # печатать так: SwapTotal = 0, ignore swapspace - swap_sigterm_pc = '-' - - # это для печати меминфо. Все переработать нахрен. - - # далее пошла проверка превышения порогов - - # MEM SWAP KILL - if (mem_available <= mem_min_sigkill_kb and - swap_free <= swap_min_sigkill_kb): - time0 = time() - - mem_info = 'Hard threshold exceeded\nMemory status that requ' \ - 'ires corrective actions:' \ - '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ - 'kill [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ - 'p_min_sigkill [{} MiB, {} %]'.format( - kib_to_mib(mem_available), - percent(mem_available / mem_total), - kib_to_mib(mem_min_sigkill_kb), - percent(mem_min_sigkill_kb / mem_total), - kib_to_mib(swap_free), - percent(swap_free / (swap_total + 0.1)), - kib_to_mib(swap_min_sigkill_kb), - swap_sigkill_pc) - - implement_corrective_action(SIGKILL) - - psi_t0 = time() - continue - - # ZRAM KILL - if mem_used_zram >= zram_max_sigkill_kb: - time0 = time() - - mem_info = 'Hard threshold exceeded\nMemory status that requir' \ - 'es corrective actions:' \ - '\n MemUsedZram [{} MiB, {} %] >= zram_max_sig' \ - 'kill [{} MiB, {} %]'.format( - kib_to_mib(mem_used_zram), - percent(mem_used_zram / mem_total), - kib_to_mib(zram_max_sigkill_kb), - percent(zram_max_sigkill_kb / mem_total)) - - implement_corrective_action(SIGKILL) - - psi_t0 = time() - continue - - # MEM SWAP TERM - if mem_available <= mem_min_sigterm_kb and \ - swap_free <= swap_min_sigterm_kb: - - time0 = time() - - mem_info = 'Soft threshold exceeded\nMemory status that requi' \ - 'res corrective actions:' \ - '\n MemAvailable [{} MiB, {} %] <= mem_min_sig' \ - 'term [{} MiB, {} %]\n SwapFree [{} MiB, {} %] <= swa' \ - 'p_min_sigterm [{} MiB, {} %]'.format( - kib_to_mib(mem_available), - percent(mem_available / mem_total), - kib_to_mib(mem_min_sigterm_kb), - # percent(mem_min_sigterm_kb / mem_total), - # ОКРУГЛЯТЬ НА МЕСТЕ ВЫШЕ (или не выше, хз) - round(mem_min_sigterm_percent, 1), - kib_to_mib(swap_free), - percent(swap_free / (swap_total + 0.1)), - kib_to_mib(swap_min_sigterm_kb), - swap_sigterm_pc) - - implement_corrective_action(SIGTERM) - - psi_t0 = time() - continue - - # ZRAM TERM - if mem_used_zram >= zram_max_sigterm_kb: - time0 = time() - - mem_info = 'Soft threshold exceeded\nMemory status that requ' \ - 'ires corrective actions:' \ - '\n MemUsedZram [{} MiB, {} %] >= ' \ - 'zram_max_sigterm [{} M, {} %]'.format( - kib_to_mib(mem_used_zram), - percent(mem_used_zram / mem_total), - kib_to_mib(zram_max_sigterm_kb), - percent(zram_max_sigterm_kb / mem_total)) - - implement_corrective_action(SIGTERM) - - psi_t0 = time() - continue - - # LOW MEMORY WARNINGS - if gui_low_memory_warnings: - - if mem_available <= mem_min_warnings_kb and \ - swap_free <= swap_min_warnings_kb + 0.1 or \ - mem_used_zram >= zram_max_warnings_kb: - warn_time_delta = time() - warn_time_now - warn_time_now = time() - warn_timer += warn_time_delta - if warn_timer > min_time_between_warnings: - send_notify_warn() - warn_timer = 0 - - # SLEEP BETWEEN MEM CHECKS - sleep_after_check_mem() diff --git a/trash/nonascii-nohang.conf b/trash/nonascii-nohang.conf deleted file mode 100644 index e969359..0000000 --- a/trash/nonascii-nohang.conf +++ /dev/null @@ -1,427 +0,0 @@ - - This is nohang config file. - - Redesign of this config in progress. - - Lines starting with #, tabs and spaces are comments. - - Lines starting with $ contain obligatory parameters. - - Lines starting with @ contain optional parameters. - - The configuration includes the following sections: - - 1. Memory levels to respond to as an OOM threat - 2. Response on PSI memory metrics - 3. The frequency of checking the level of available memory - (and CPU usage) - 4. The prevention of killing innocent victims - 5. Impact on the badness of processes via matching their - - names, - - cmdlines and - - UIDs - with regular expressions - 6. The execution of a specific command instead of sending the - SIGTERM signal - 7. GUI notifications: - - OOM prevention results and - - low memory warnings - 8. Output verbosity - 9. Misc - - Just read the description of the parameters and edit the values. - Please restart the program after editing the config. - -##################################################################### - - 1. Thresholds below which a signal should be sent to the victim - - Sets the available memory levels at or below which SIGTERM or SIGKILL - signals are sent. The signal will be sent if MemAvailable and - SwapFree (in /proc/meminfo) at the same time will drop below the - corresponding values. Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. - - MemAvailable levels. - -mem_min_sigterm = 10 % -mem_min_sigkill = 5 % - - SwapFree levels. - -swap_min_sigterm = 10 % -swap_min_sigkill = 5 % - - Specifying the total share of zram in memory, if exceeded the - corresponding signals are sent. As the share of zram in memory - increases, it may fall responsiveness of the system. 90 % is a - usual hang level, not recommended to set very high. - - Can be specified in % and M. Valid values are floating-point - numbers from the range [0; 90] %. - -zram_max_sigterm = 50 % -zram_max_sigkill = 55 % - -##################################################################### - - 2. Response on PSI memory metrics (it needs Linux 4.20 and up) - - About PSI: - https://facebookmicrosites.github.io/psi/ - - Disabled by default (ignore_psi = True). - -ignore_psi = True - - Choose a path to PSI file. - By default it monitors system-wide file: /proc/pressure/memory - You also can set file to monitor one cgroup slice. - For example: - psi_path = /sys/fs/cgroup/unified/user.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/memory.pressure - psi_path = /sys/fs/cgroup/unified/system.slice/foo.service/memory.pressure - - Execute the command - find /sys/fs/cgroup | grep -P "memory\.pressure$" - to find available memory.pressue files (except /proc/pressure/memory). - -psi_path = /proc/pressure/memory - - Valid psi_metrics are: - some_avg10 - some_avg60 - some_avg300 - full_avg10 - full_avg60 - full_avg300 - - some_avg10 is most sensitive. - -psi_metrics = some_avg10 - -sigterm_psi_threshold = 80 -sigkill_psi_threshold = 90 - -psi_post_action_delay = 60 - -##################################################################### - - 3. The frequency of checking the amount of available memory - (and CPU usage) - - Coefficients that affect the intensity of monitoring. Reducing - the coefficients can reduce CPU usage and increase the periods - between memory checks. - - Why three coefficients instead of one? Because the swap fill rate - is usually lower than the RAM fill rate. - - It is possible to set a lower intensity of monitoring for swap - without compromising to prevent OOM and thus reduce the CPU load. - - Default values are well for desktop. On servers without rapid - fluctuations in memory levels the values can be reduced. - - Valid values are positive floating-point numbers. - -rate_mem = 4000 -rate_swap = 1500 -rate_zram = 500 - - See also https://github.com/rfjakob/earlyoom/issues/61 - - - Максимальное время сна между проверками памяти. - Положительное число. - -max_sleep_time = 3 - - Минимальное время сна между проверками памяти. - Положительное число, не превышающее max_sleep_time. - -min_sleep_time = 0.1 - -##################################################################### - - 4. The prevention of killing innocent victims - - Минимальное значение bandess (по умолчанию равно oom_score), - которым должен обладать - процесс для того, чтобы ему был отправлен сигнал. - Позволяет предотвратить убийство невиновных если что-то - пойдет не так. - - Valid values are integers from the range [0; 1000]. - -min_badness = 20 - - Минимальная задержка после отправки соответствующих сигналов - для предотвращения риска убийства сразу множества процессов. - - Valid values are non-negative floating-point numbers. - -min_delay_after_sigterm = 0.2 -min_delay_after_sigkill = 1 - - Процессы браузера chromium обычно имеют oom_score_adj - 200 или 300. Это приводит к тому, что процессы хрома умирают - первыми вместо действительно тяжелых процессов. - Если параметр decrease_oom_score_adj установлен - в значение True, то у процессов, имеющих oom_score_adj выше - oom_score_adj_max значение oom_score_adj будет опущено - до oom_score_adj_max перед поиском жертвы. - - Enabling the option requires root privileges. - Valid values are True and False. - Values are case sensitive. - -decrease_oom_score_adj = False - - Valid values are integers from the range [0; 1000]. - -oom_score_adj_max = 20 - -##################################################################### - - 5. Impact on the badness of processes via matching their names, - cmdlines or UIDs with regular expressions using re.search(). - - See https://en.wikipedia.org/wiki/Regular_expression and - https://en.wikipedia.org/wiki/Perl_Compatible_Regular_Expressions - - Enabling this options slows down the search for the victim - because the names, cmdlines or UIDs of all processes - (except init and kthreads) are compared with the - specified regex patterns (in fact slowing down is caused by - reading all /proc/*/cmdline and /proc/*/status files). - - Use script `oom-sort` from nohang package to view - names, cmdlines and UIDs of processes. - - - 5.1 Matching process names with RE patterns - - Valid values are True and False. - -regex_matching = False - - Syntax: - - @PROCESSNAME_RE badness_adj /// RE_pattern - - New badness value will be += badness_adj - - It is possible to compare multiple patterns - with different badness_adj values. - - Example: - -@PROCESSNAME_RE -100 /// ^Xorg$ - -@PROCESSNAME_RE -500 /// ^sshd$ - - 5.2 Matching cmdlines with RE patterns - - A good option that allows fine adjustment. - -re_match_cmdline = False - -@CMDLINE_RE 300 /// -childID|--type=renderer - -@CMDLINE_RE -200 /// ^/usr/lib/virtualbox - - - 5.3 Matching UIDs with RE patterns - - The most slow option - -re_match_uid = False - -@UID_RE -100 /// ^0$ - - 5.4 Matching CGroup-line with RE patterns - -re_match_cgroup = True - - @CGROUP_RE -50 /// system.slice - - @CGROUP_RE 50 /// foo.service -@CGROUP_RE 2000 /// user.slice - - 5.5 Matching realpath with RE patterns - -re_match_realpath = False - -@REALPATH_RE 20 /// ^/usr/bin/foo - - Note that you can control badness also via systemd units via OOMScoreAdjust, see - https://www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= - -##################################################################### - - 6. The execution of a specific command instead of sending the - SIGTERM signal. - - For processes with a specific name you can specify a command to - run instead of sending the SIGTERM signal. - - For example, if the process is running as a daemon, you can run - the restart command instead of sending SIGTERM. - - Valid values are True and False. - -execute_the_command = False - - The length of the process name can't exceed 15 characters. - The syntax is as follows: lines starting with keyword $ETC are - considered as the lines containing names of processes and - corresponding commands. After a name of process the triple slash - (///) follows. And then follows the command that will be - executed if the specified process is selected as a victim. The - ampersand (&) at the end of the command will allow nohang to - continue runing without waiting for the end of the command - execution. - - For example: - $ETC mysqld /// systemctl restart mariadb.service & - $ETC php-fpm7.0 /// systemctl restart php7.0-fpm.service - - If command will contain $PID pattern, this template ($PID) will - be replaced by PID of process which name match with RE pattern. - - Exmple: - - $ETC bash /// kill -KILL $PID - - It is way to send any signal instead of SIGTERM. - (run `kill -L` to see list of all signals) - - Also $NAME will be replaced by process name. - - $ETC bash /// kill -9 $PID - -$ETC firefox-esr /// kill -SEGV $PID - -$ETC tail /// kill -9 $PID - -$ETC apache2 /// systemctl restart apache2 - - -##################################################################### - - 7. GUI notifications: - - OOM prevention results and - - low memory warnings - - Включение этой опции требует наличия notify-send в системе. - В Debian/Ubuntu это обеспечивается установкой пакета - libnotify-bin. В Fedora и Arch Linux - пакет libnotify. - Также требуется наличие сервера уведомлений. - При запуске nohang от рута уведомления рассылаются всем - залогиненным пользователям. - See also wiki.archlinux.org/index.php/Desktop_notifications - Valid values are True and False. - -gui_notifications = False - - Enable GUI notifications about the low level of available memory. - Valid values are True and False. - -gui_low_memory_warnings = False - - Execute the command instead of sending GUI notifications if the value is - not empty line. For example: - warning_exe = cat /proc/meminfo & - -warning_exe = - - Если значения MemAvailable и SwapFree одновременно будут ниже - соотвестствующих значений, то будут отправлены уведомления. - - Can be specified in % (percent) and M (MiB). - Valid values are floating-point numbers from the range [0; 100] %. - -mem_min_warnings = 25 % - -swap_min_warnings = 25 % - - Если доля zram в памяти превысит значение zram_max_warnings, - то будут отправляться уведомления с минимальным периодом равным - min_time_between_warnings. - -zram_max_warnings = 40 % - - Минимальное время между отправками уведомлений в секундах. - Valid values are floating-point numbers from the range [1; 300]. - -min_time_between_warnings = 15 - - Ampersands (&) will be replaced with asterisks (*) in process - names and in commands. - -##################################################################### - - 8. Verbosity - - Display the configuration when the program starts. - Valid values are True and False. - -print_config = False - - Print memory check results. - Valid values are True and False. - -print_mem_check_results = False - - Минимальная периодичность печати состояния памяти. - 0 - печатать все проверки памяти. - Неотрицательное число. - -min_mem_report_interval = 60 - - Print sleep periods between memory checks. - Valid values are True and False. - -print_sleep_periods = False - - Печатать общую статистику по корректирующим действиям с момента - запуска nohang после каждого корректирующего действия. - -print_total_stat = True - - Печатать таблицу процессов перед каждым корректирующим действием. - -print_proc_table = False - -print_victim_info = True - - Максимальная глубина показа родословной жертвы. - По умолчанию (1) показывается только родитель - PPID. - Целое положительное число. - -max_ancestry_depth = 1 - -separate_log = False - -psi_debug = False - -##################################################################### - - 9. Misc - - Жертва может не реагировать на SIGTERM. - max_post_sigterm_victim_lifetime - это время, при превышении - которого жертва получит SIGKILL. - Неотрицательные числа. - -max_post_sigterm_victim_lifetime = 10 - - Execute the command after sending SIGKILL to the victim if the value is - not empty line. For example: - post_kill_exe = cat /proc/meminfo & - -post_kill_exe = - -forbid_negative_badness = True - diff --git a/trash/oom-trigger b/trash/oom-trigger deleted file mode 100755 index f0dcdf5..0000000 --- a/trash/oom-trigger +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python3 - -# интерактивный oom-trigger - -from memco import * - -from signal import signal, SIGTERM -from time import sleep -from sys import exit - - -def signal_handler(signum, frame): - print('Got signal {}'.format(signum)) - # sleep(1) - # exit() - - -signal(SIGTERM, signal_handler) - - - -# печать показателей на этапах работы -def print_mem(): - - mem_tup = mem_check_main() - - mem_available = mem_tup[0] - swap_total = mem_tup[1] - swap_free = mem_tup[2] - - print( - 'MemAvailable: ', round(mem_available / 1024 / 1024, 3), 'GiB,', round(mem_available / 1024), 'MiB,', round(mem_available / mem_total * 100, 1), '%' - ) - - if swap_total != 0: - print( - 'SwapFree: ', round(swap_free / 1024 / 1024, 3), 'GiB,', round(swap_free / 1024), 'MiB,', round(swap_free / swap_total * 100, 1), '%' - ) - print( - 'Total Free: ', round((mem_available + swap_free) / 1024 / 1024, 3), 'GiB,', round((mem_available + swap_free) / 1024), 'MiB,', round((mem_available + swap_free) / (mem_total + swap_total) * 100, 1), '%' - ) - else: - print( - 'Swap disabled' - ) - - - -# бесконечный жор -def inf(): - - print( - 'Вводите целые неотрицательные числа. Чем больше, тем быстрее потребление памяти.\n1000 same обеспечивает потребление на уровне полтора гиг в секунду,\nurandom работает на скорости максимум 170 M/s' - ) - same = input("same: ") - urandom = input("urandom: ") - - expanding_list = [] - - print( - 'Процесс неограниченного потребления пошёл... Press Ctrl + C for exit' - ) - - while True: - try: - expanding_list.append(os.urandom(int(urandom))) - expanding_list.append('#' * int(same)) - except MemoryError: - print('MemoryError, start побайтовая добивалка!') - terminal() - - - - -# жор числп гиг -def lim(): - - expanding_list = [] - - n = input('На сколько гигабайт уменьшить доступную память?\n: ') - - print('Погнали тратить ' + n + ' гиг...') - - i = 0 - - while True: - - i += 1 - - try: - expanding_list.append(os.urandom(int(100))) - expanding_list.append('#' * int(300)) - except MemoryError: - print('MemoryError!') - break - if i > 2020202 * int(n): - print('DONE') - break - - return expanding_list - - - - -# жор до остатка мегабайт -def lim2avail(): - - expanding_list = [] - - n = input( - 'Сколько мегабайт общей доступной памяти (MemAvailable + SwapFree) оставить?\nВведите целое положительное число: ' - ) - - # проверка на целое положительное - if n.isdigit() == True: - n = int(n) - else: - print( - 'Вы ввели не целое положительное число' - ) - return 0 - - if n == 0: - print( - 'Вы ввели не целое положительное число' - ) - return 0 - - print( - 'Погнали уменьшать доступную память до уровня ниже ' + str(n) + ' MiB...' - ) - - while True: - try: - expanding_list.append(os.urandom(5000)) - expanding_list.append('#' * 5000) - except MemoryError: - print('MemoryError!') - break - if total_mem_available() <= n: - print('DONE') - break - - return expanding_list - - - - - - -print('WARNING: эта прога способна потратить память и повесить систему, будьте осторожны.') -print('При ее работе следите за показателями памяти.') - -ex_list = [] - -try: - while True: - - print() - print_mem() - print() - print('Выберите вариант из списка ниже') - print('8 или i или I - запустить бесконечное потребление, предложив выбрать скорость потребления и энтропию') - print('7 или l или L - запустить ограниченное потребление заданного числа гигов') - print('6 или a или A - жрать память пока количество доступной памяти не опустится ниже заданного') - print('0 или с или С - очистить накопления при их наличии') - print('q или любой другой символ - выход (можно просто нажать Enter)') - - li = input(': ') - - if li is 'l' or li is 'L' or li is '7': - x = lim() - ex_list.append(x) - elif li is 'i' or li is 'I' or li is '8': - inf() - elif li is 'c' or li is 'C' or li is '0': - ex_list = [] - x = 0 - y = 0 - elif li is '6' or li is 'a' or li is 'A': - y = lim2avail() - ex_list.append(y) - else: - exit() - -except KeyboardInterrupt: - print() - print_mem() - selfterm() - - - - - diff --git a/trash/proc2log b/trash/proc2log deleted file mode 100755 index 14127f5..0000000 --- a/trash/proc2log +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env python3 - -# proc2log: new processes monitor - -import os -import argparse -from time import sleep - -parser = argparse.ArgumentParser() -parser.add_argument( - '-p', - '--period', - help='peroid in seconds between proc checks, default value: 0.1', - default=0.1, - type=float - ) -args = parser.parse_args() -period = args.period - -unknown_name = '' - - -# получение чистого пид сета -def foo(): - proc_list = os.listdir('/proc') - proc_set = set() - for i in proc_list: - if i.isdigit() != True: - continue - proc_set.add(i) - return proc_set - - -# имя через пид -def pid_to_name(pid): - try: - with open('/proc/' + pid + '/status') as f: - for line in f: - return line[:-1].split('\t')[1] - except FileNotFoundError: - return unknown_name - - -# печать одного пида и имени -def print_pid(pid): - - try: - with open('/proc/' + pid + '/status') as f: - for lineno, line in enumerate(f): - name = line[:-1].split('\t')[1] - print('+ {}, {}'.format(pid, name)) - pid_dict[pid] = name - if lineno >= 0: - break - - except FileNotFoundError: - name = pid_dict.pop(pid, unknown_name) - print(' - {}, {}'.format(pid, name)) - - except ProcessLookupError: - name = pid_dict.pop(pid, unknown_name) - print(' - {}, {}'.format(pid, name)) - - -# нахождение и печать дельт сетов -def delta(old_set): - - new_set = set(os.listdir('/proc')) - plus = new_set - old_set - minus = old_set - new_set - - if len(plus) > 0: - for pid in plus: - print_pid(pid) - - if len(minus) > 0: - for pid in minus: - print_pid(pid) - - return new_set - - -print('proc2log started with period {} seconds'.format(period)) -print('+ PID, NAME - PID, NAME') - -# первичное наполнение словаря значениями pid:name для печати имён исчезнувших процессов -pid_dict = dict() -for pid in foo(): - pid_dict[pid] = pid_to_name(pid) - -pid_set = set(os.listdir('/proc')) -try: - while True: - pid_set = delta(pid_set) - sleep(period) -except KeyboardInterrupt: - exit() - diff --git a/trash/psi-monitor b/trash/psi-monitor deleted file mode 100755 index 70b4c42..0000000 --- a/trash/psi-monitor +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python3 - -from ctypes import CDLL -from time import sleep -from sys import argv - -""" - Execute the command - find /sys/fs/cgroup -name memory.pressure - to find available memory.pressue files (except /proc/pressure/memory). - (actual for cgroup2) -""" - -if len(argv) > 1: - psi_path = argv[1] -else: - psi_path = '/proc/pressure/memory' - - -def mlockall(): - - MCL_CURRENT = 1 - MCL_FUTURE = 2 - MCL_ONFAULT = 4 - - libc = CDLL('libc.so.6', use_errno=True) - - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT - ) - if result != 0: - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE - ) - if result != 0: - print('WARNING: cannot lock all memory') - else: - pass - else: - pass - - -mlockall() - - -def psi_path_to_metrics(psi_path): - - with open(psi_path) as f: - psi_list = f.readlines() - # print(psi_list) - some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ') - #print(some_list, full_list) - some_avg10 = some_list[1].split('=')[1] - some_avg60 = some_list[2].split('=')[1] - some_avg300 = some_list[3].split('=')[1] - - full_avg10 = full_list[1].split('=')[1] - full_avg60 = full_list[2].split('=')[1] - full_avg300 = full_list[3].split('=')[1] - - return (some_avg10, some_avg60, some_avg300, - full_avg10, full_avg60, full_avg300) - - -print('Path to PSI file: {}\n'.format(psi_path)) - - -print(' avg10 avg60 avg300 avg10 avg60 avg300') - -while True: - (some_avg10, some_avg60, some_avg300, - full_avg10, full_avg60, full_avg300) = psi_path_to_metrics(psi_path) - - print('some {} {} {} | full {} {} {}'.format( - some_avg10.rjust(6), - some_avg60.rjust(6), - some_avg300.rjust(6), - full_avg10.rjust(6), - full_avg60.rjust(6), - full_avg300.rjust(6))) - - sleep(2) diff --git a/trash/psi-monitor-old b/trash/psi-monitor-old deleted file mode 100755 index 80694f9..0000000 --- a/trash/psi-monitor-old +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python3 - -from time import sleep, time -import os -from sys import stdout - -mlockall = True - -if mlockall: - from ctypes import CDLL - CDLL('libc.so.6').mlockall(3) - -psi_path = '/proc/pressure/memory' - -psi_support = os.path.exists(psi_path) - -def rline1(path): - """read 1st line from path.""" - with open(path) as f: - for line in f: - return line[:-1] - -def psi_mem_some_avg_total(): - return float(rline1(psi_path).rpartition('=')[2]) - -avg_min_time = 1 - -if psi_support: - ta0 = time() - a0 = psi_mem_some_avg_total() - -while True: - - if psi_support: - - ta1= time() - dt = ta1 - ta0 - - if dt >= avg_min_time: - - a1 = psi_mem_some_avg_total() - avg = (a1 - a0) / (ta1 - ta0) / 10000 - - print('avg time:', round(dt, 1)) - print('PSI mem avg:', round(avg, 2)) - print(rline1(psi_path), '\n') - ta0 = ta1 - a0 = a1 - - stdout.flush() - sleep(0.1) diff --git a/trash/psi-top b/trash/psi-top deleted file mode 100755 index 73e695b..0000000 --- a/trash/psi-top +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python3 - -from ctypes import CDLL -from time import sleep, time -import os - -""" - Execute the command - find /sys/fs/cgroup -name memory.pressure - to find available memory.pressue files (except /proc/pressure/memory). - (actual for cgroup2) -""" - -psi_path = '/proc/pressure/memory' - -def mlockall(): - - MCL_CURRENT = 1 - MCL_FUTURE = 2 - MCL_ONFAULT = 4 - - libc = CDLL('libc.so.6', use_errno=True) - - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT - ) - if result != 0: - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE - ) - if result != 0: - print('WARNING: cannot lock all memory') - else: - pass - else: - pass - - -mlockall() - -t0 = time() - -def psi_path_to_metrics(psi_path): - - with open(psi_path) as f: - psi_list = f.readlines() - # print(psi_list) - some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ') - #print(some_list, full_list) - some_avg10 = some_list[1].split('=')[1] - some_avg60 = some_list[2].split('=')[1] - some_avg300 = some_list[3].split('=')[1] - - full_avg10 = full_list[1].split('=')[1] - full_avg60 = full_list[2].split('=')[1] - full_avg300 = full_list[3].split('=')[1] - - return (some_avg10, some_avg60, some_avg300, - full_avg10, full_avg60, full_avg300) - - - -def cgroup2_root(): - """ - """ - with open('/proc/mounts') as f: - for line in f: - if ' cgroup2 ' in line: - # if line.startswith('cgroup2 '): - return line[7:].rpartition(' cgroup2 ')[0] - - -def get_psi_mem_files(cgroup2_path): - """ - """ - - path_list = [] - - for root, dirs, files in os.walk(cgroup2_path): - for file in files: - path = os.path.join(root, file) - if path.endswith('/memory.pressure'): ############# - path_list.append(path) - - return path_list - - -def psi_path_to_cgroup2(path): - """ - """ - return path.partition(i)[2][:-16] - - -i = cgroup2_root() - -print('cgroup2 root dir:', i) -if i is not None: - y = get_psi_mem_files(i) - for path in y: - pass # print(psi_path_to_cgroup2(path)) - -path_list = get_psi_mem_files(i) - -print(' avg10 avg60 avg300 avg10 avg60 avg300 cgroup2') - -print(' ----- ----- ------ ----- ----- ------ ---------') - -(some_avg10, some_avg60, some_avg300, full_avg10, full_avg60, full_avg300) = psi_path_to_metrics('/proc/pressure/memory') -print('some {} {} {} | full {} {} {} {}'.format( - some_avg10.rjust(6), - some_avg60.rjust(6), - some_avg300.rjust(6), - full_avg10.rjust(6), - full_avg60.rjust(6), - full_avg300.rjust(6), '[SYSTEM]')) - - -for psi_path in path_list: - (some_avg10, some_avg60, some_avg300, - full_avg10, full_avg60, full_avg300) = psi_path_to_metrics(psi_path) - - print('some {} {} {} | full {} {} {} {}'.format( - some_avg10.rjust(6), - some_avg60.rjust(6), - some_avg300.rjust(6), - full_avg10.rjust(6), - full_avg60.rjust(6), - full_avg300.rjust(6), psi_path_to_cgroup2(psi_path))) - - -print(time() - t0) - - diff --git a/trash/psi-trigger b/trash/psi-trigger deleted file mode 100755 index 54c5be0..0000000 --- a/trash/psi-trigger +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python3 - -from time import sleep, time - -t0 = time() - -hog_list = [] - -duration = 60 - -sff_max = 0.55 -sff_min = 0.45 - -mb = 1024 * 1024 -path = '/dev/zero' - -def sff(): - """ - SwapFree fraction - """ - with open('/proc/meminfo') as f: - for i in f: - if i.startswith('SwapTotal'): - st = i.split(':')[1].strip(' kB\n') - if i.startswith('SwapFree'): - sf = i.split(':')[1].strip(' kB\n') - st = float(st) + 1 - sf = float(sf) - return sf / st - - -def hog(hog_list): - """ - """ - with open(path, 'rb') as f: - raw = f.read(mb) - hog_list.append(raw) - - return hog_list - - -while True: - - while sff() > sff_min: - hog_list.reverse() - if time() - t0 > duration: - exit() - hog_list = hog(hog_list) - print('MiB:', len(hog_list), 'SwapFree:', sff(), 'time:', time() - t0) - - while sff() < sff_max: - hog_list.reverse() - if time() - t0 > duration: - exit() - try: - hog_list.pop() - except IndexError: - break - print('MiB:', len(hog_list), 'SwapFree:', sff(), 'time:', time() - t0) - diff --git a/trash/psi_dummy b/trash/psi_dummy deleted file mode 100644 index f490e2e..0000000 --- a/trash/psi_dummy +++ /dev/null @@ -1,2 +0,0 @@ -some avg10=56.70 avg60=51.59 avg300=22.92 total=195239452 -full avg10=28.82 avg60=49.77 avg300=21.83 total=182504463 diff --git a/trash/random-trigger b/trash/random-trigger deleted file mode 100755 index 2c49a46..0000000 --- a/trash/random-trigger +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 - -import random - -# число элементов в списке, влияет на потребляемую память -n = 400 * 1000 * 1000 - -# число итераций замены элементов списка -c = 10 - -print('Наполняем список (n = {}) случайными числами...'.format(n)) - -try: - # добавляем в пустой список n случайных чисел - x = [] - for _ in range(n): - x.append(random.random()) - - for i in range(c): - print('Читение и запись новых значений, итерация {} из {}'.format(i + 1, c)) - - # заменяем элементы списка на новые - for i in range(n): - x[i] = x[i] * 0.999 - - del x - -except KeyboardInterrupt: - del x - diff --git a/trash/rawcat b/trash/rawcat deleted file mode 100755 index 1cff205..0000000 --- a/trash/rawcat +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 - -# rawcat 1 999 /path/to/file - -from sys import argv - -print('argv:') -print(argv) - -mode = int(argv[1]) -num = int(argv[2]) -path = argv[3] - -if mode == 0: - with open(path, 'rb') as f: - raw = f.read(num) -if mode == 1: - with open(path, 'rb') as f: - raw = f.read(num).decode('utf-8', 'ignore') -if mode == 2: - with open(path) as f: - raw = f.read(num) - -print('raw:') -print([raw]) diff --git a/trash/t01 b/trash/t01 deleted file mode 100755 index bf69058..0000000 --- a/trash/t01 +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python3 - -from signal import signal, SIGTERM - -def signal_handler(signum, frame): - print('Got signal {}'.format(signum)) - -signal(SIGTERM, signal_handler) - -rate = 99999 - -x = [] - -while True: - x.append('#' * rate) - -# http://okturing.com/src/6140/body diff --git a/trash/thanatolog b/trash/thanatolog deleted file mode 100755 index fcf6dba..0000000 --- a/trash/thanatolog +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/env python3 - -import os -from time import sleep, time -from signal import (signal, - SIGKILL, SIGTERM, SIGINT, SIGQUIT, - SIGCONT, SIGUSR1, SIGUSR2, - SIGHUP, SIGABRT, SIGSEGV, SIGBUS) -from sys import argv, exit - - -def mlockall(): - """Lock all memory to prevent swapping the process.""" - - from ctypes import CDLL - - MCL_CURRENT = 1 - MCL_FUTURE = 2 - MCL_ONFAULT = 4 - - libc = CDLL('libc.so.6', use_errno=True) - - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT - ) - if result != 0: - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE - ) - if result != 0: - print('WARNING: cannot lock all memory') - else: - log('All memory locked with MCL_CURRENT | MCL_FUTURE') - else: - print('All memory locked with MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT') - - -def check_mem(): - """find mem_available""" - with open('/proc/meminfo') as f: - for n, line in enumerate(f): - if n is 2: - mem_available = int(line.split(':')[1][:-4]) - return mem_available - - -def pid_to_name(pid): - """ - """ - try: - with open('/proc/' + pid + '/comm', 'rb') as f: - return f.read().decode('utf-8', 'ignore')[:-1] - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - - -def pid_to_state(pid): - - x = rline1('/proc/' + pid + '/stat') - - if ')' in x: - return x.rpartition(')')[2][1] - else: - return ' ' - - -def pid_to_rss(pid): - try: - rss = rline1('/proc/{}/statm'.format(pid)).split(' ')[1] - except IndexError: - rss = '-0' - return rss - - -def pid_to_realpath(pid): - try: - return os.path.realpath('/proc/' + pid + '/exe') - except FileNotFoundError: - return '' - - -def rline1(path): - """read 1st line from path.""" - try: - with open(path) as f: - for line in f: - return line[:-1] - except UnicodeDecodeError: - with open(path, 'rb') as f: - return f.read(999).decode( - 'utf-8', 'ignore').split('\n')[0] # use partition()! - except FileNotFoundError: - return 'FileNotFoundError' - except ProcessLookupError: - return 'ProcessLookupError' - - -############################################################################### - - -if len(argv) != 2: - print("""Usage: -thanatolog PID""") - exit() - - -mlockall() - - -pid = argv[1] -name = pid_to_name(pid) -rss0 = float(pid_to_rss(pid)) -ma = check_mem() - - -print('PID:', pid) -print('Name:', name) -print('RSS at startup: {} (100.0 %)'.format(int(rss0))) -print('MemAvail:', ma) - -send_signal = SIGKILL - -# os.kill(int(pid), SIGCONT) - - -t0 = time() - - -for i in range(10): - rpe = os.path.exists('/proc/{}/exe'.format(pid)) - rss = pid_to_rss(pid) - pe = os.path.exists('/proc/{}'.format(pid)) - t1 = time() - d = t1 - t0 - state = pid_to_state(pid) - ma = check_mem() - vv = pid_to_cmdline(pid) - print(vv) - print('RP: {} | RSS: {} ({} %) | State: {} | time: {} | MemAv' - 'ail: {}'.format(rpe, rss, round(float(rss) / ( - rss0 + 0.0001) * 100, 1), state, round(d, 3), ma)) - -print('Send SIGKILL') - -os.kill(int(pid), send_signal) - -t0 = time() - -while True: - rpe = os.path.exists('/proc/{}/exe'.format(pid)) - rss = pid_to_rss(pid) - pe = os.path.exists('/proc/{}'.format(pid)) - t1 = time() - d = t1 - t0 - state = pid_to_state(pid) - ma = check_mem() - - vv = pid_to_cmdline(pid) - print(vv) - - print('RP: {} | RSS: {} ({} %) | State: {} | time: {} | MemAv' - 'ail: {}'.format(rpe, rss, round(float(rss) / ( - rss0 + 0.0001) * 100, 1), state, round(d, 3), ma)) - - if pe is False: - print('Process {} ({}) died in {} sec'.format(pid, name, round(d, 3))) - exit() diff --git a/trash/thanatolog2 b/trash/thanatolog2 deleted file mode 100755 index 7d058b8..0000000 --- a/trash/thanatolog2 +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env python3 - -import os -from time import sleep, time -from signal import (signal, - SIGKILL, SIGTERM, SIGINT, SIGQUIT, - SIGCONT, SIGUSR1, SIGUSR2, - SIGHUP, SIGABRT, SIGSEGV, SIGBUS) -from sys import argv, exit - - -def mlockall(): - """Lock all memory to prevent swapping the process.""" - - from ctypes import CDLL - - MCL_CURRENT = 1 - MCL_FUTURE = 2 - MCL_ONFAULT = 4 - - libc = CDLL('libc.so.6', use_errno=True) - - result = libc.mlockall( - MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT - ) - - -def check_mem(): - """find mem_available""" - with open('/proc/meminfo') as f: - for n, line in enumerate(f): - if n is 2: - mem_available = int(line.split(':')[1][:-4]) - return round(mem_available / 1024.0) - - -def pid_to_name(pid): - """ - """ - try: - with open('/proc/' + pid + '/comm', 'rb') as f: - return f.read().decode('utf-8', 'ignore')[:-1] - except FileNotFoundError: - return '' - except ProcessLookupError: - return '' - - -def pid_to_state(pid): - - x = rline1('/proc/' + pid + '/stat') - - if ')' in x: - return x.rpartition(')')[2][1] - else: - return ' ' - - -def pid_to_rss(pid, SC_PAGESIZE): - try: - rss = rline1('/proc/{}/statm'.format(pid)).split(' ')[1] - except IndexError: - rss = '-0' - return round(int(rss) * SC_PAGESIZE / (1024.0 ** 2)) - - -def pid_to_realpath(pid): - try: - return os.path.realpath('/proc/' + pid + '/exe') - except FileNotFoundError: - return '' - - -def rline1(path): - """read 1st line from path.""" - try: - with open(path) as f: - for line in f: - return line[:-1] - except UnicodeDecodeError: - with open(path, 'rb') as f: - return f.read(999).decode( - 'utf-8', 'ignore').split('\n')[0] # use partition()! - except FileNotFoundError: - return 'FileNotFoundError' - except ProcessLookupError: - return 'ProcessLookupError' - - -############################################################################### - - -if len(argv) != 2: - print("""Usage: - thanatolog PID""") - exit() - - -mlockall() - -SC_PAGESIZE = os.sysconf(os.sysconf_names['SC_PAGESIZE']) -pid = argv[1] -name = pid_to_name(pid) -rss0 = float(pid_to_rss(pid, SC_PAGESIZE)) -ma = check_mem() - - -print('PID:', pid) -print('Name:', name) -print('RSS at startup: {} (100.0 %)'.format(int(rss0))) -print('MemAvail:', ma) - -send_signal = SIGKILL - -# os.kill(int(pid), SIGCONT) - - -t0 = time() - - -for i in range(10): - sleep(0.001) - rpe = os.path.exists('/proc/{}/exe'.format(pid)) - rss = pid_to_rss(pid, SC_PAGESIZE) - pe = os.path.exists('/proc/{}'.format(pid)) - t1 = time() - d = t1 - t0 - state = pid_to_state(pid) - ma = check_mem() - print('RP: {} | RSS: {} ({} %) | {} | t: {:0<6} | MemAv' - 'ail: {}'.format(rpe, rss, round(float(rss) / ( - rss0 + 0.0001) * 100, 1), state, str(round(d, 4)), ma)) - - -print() - -print('Send SIGKILL') - -os.kill(int(pid), send_signal) - -t0 = time() - -ma0 = ma - - -while True: - sleep(0.001) - rpe = os.path.exists('/proc/{}/exe'.format(pid)) - rss = pid_to_rss(pid, SC_PAGESIZE) - pe = os.path.exists('/proc/{}'.format(pid)) - t1 = time() - d = t1 - t0 - state = pid_to_state(pid) - ma = check_mem() - - print('RP: {} | RSS: {} ({} %) | State: {} | time: {} | MemAvail: {} | dMA {}'.format( - rpe, rss, round(float(rss) / (rss0 + 0.0001) * 100, 1), state, round(d, 3), ma, ma0 - ma)) - - if pe is False: - break - -print('Process {} ({}) died in {} sec'.format(pid, name, round(d, 3))) - - -print() -for i in range(10): - sleep(0.001) - rpe = os.path.exists('/proc/{}/exe'.format(pid)) - rss = pid_to_rss(pid, SC_PAGESIZE) - pe = os.path.exists('/proc/{}'.format(pid)) - t1 = time() - d = t1 - t0 - state = pid_to_state(pid) - ma = check_mem() - - print('RP: {} | RSS: {} ({} %) | State: {} | time: {} | MemAvail: {} | dMA {}'.format( - rpe, rss, round(float(rss) / (rss0 + 0.0001) * 100, 1), state, round(d, 3), ma, ma0 - ma)) diff --git a/trash/x01 b/trash/x01 deleted file mode 100755 index 312bc14..0000000 --- a/trash/x01 +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/env python3 - - -from os import getpid - -# find mem_total -# find positions of SwapFree and SwapTotal in /proc/meminfo - -with open('/proc/meminfo') as f: - mem_list = f.readlines() - -mem_list_names = [] -for s in mem_list: - mem_list_names.append(s.split(':')[0]) - -if mem_list_names[2] != 'MemAvailable': - errprint('WARNING: Your Linux kernel is too old, Linux 3.14+ requied') - # exit(1) - -swap_total_index = mem_list_names.index('SwapTotal') -swap_free_index = swap_total_index + 1 - -mem_total = int(mem_list[0].split(':')[1][:-4]) - -# Get names from /proc/*/status to be able to get VmRSS and VmSwap values - -with open('/proc/self/status') as file: - status_list = file.readlines() - -status_names = [] -for s in status_list: - status_names.append(s.split(':')[0]) - -ppid_index = status_names.index('PPid') -vm_size_index = status_names.index('VmSize') -vm_rss_index = status_names.index('VmRSS') -vm_swap_index = status_names.index('VmSwap') -uid_index = status_names.index('Uid') -state_index = status_names.index('State') - - -try: - anon_index = status_names.index('RssAnon') - file_index = status_names.index('RssFile') - shmem_index = status_names.index('RssShmem') - detailed_rss = True - # print(detailed_rss, 'detailed_rss') -except ValueError: - detailed_rss = False - # print('It is not Linux 4.5+') - - - -self_pid = str(getpid()) - - -def self_rss(): - r = pid_to_status(self_pid)[5] - print(r) - - - -def pid_to_status(pid): - """ - """ - - try: - - with open('/proc/' + pid + '/status') as f: - - for n, line in enumerate(f): - - if n is 0: - name = line.split('\t')[1][:-1] - - if n is state_index: - state = line.split('\t')[1][0] - continue - - if n is ppid_index: - ppid = line.split('\t')[1][:-1] - continue - - if n is uid_index: - uid = line.split('\t')[2] - continue - - if n is vm_size_index: - vm_size = int(line.split('\t')[1][:-4]) - continue - - if n is vm_rss_index: - vm_rss = int(line.split('\t')[1][:-4]) - continue - - if n is vm_swap_index: - vm_swap = int(line.split('\t')[1][:-4]) - break - - return name, state, ppid, uid, vm_size, vm_rss, vm_swap - - except UnicodeDecodeError: - return pid_to_status_unicode(pid) - - except FileNotFoundError: - return None - - except ProcessLookupError: - return None - - except ValueError: - return None - - -self_rss() - - -import logging -import subprocess -import argparse - - -self_rss() - diff --git a/trash/zram-off b/trash/zram-off deleted file mode 100755 index f8aea9b..0000000 --- a/trash/zram-off +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -# Deactivate swap -swapoff /dev/zram0 - -# Reset zram -echo 1 > /sys/block/zram0/reset - -# Remove zram module -modprobe -r zram diff --git a/trash/zram-on b/trash/zram-on deleted file mode 100755 index 7507b2f..0000000 --- a/trash/zram-on +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh - -# загружаем в ядро модуль zram -modprobe -v zram num_devices=4 - -# задаем число потоков сжатия, равное числу ядер процессора -CPUS="`nproc`" -echo "$CPUS" > /sys/block/zram0/max_comp_streams - -# выбираем алгоритм сжатия, lz4 наиболее быстр, lzo сильнее сжимает -ALG=lzo -echo "$ALG" > /sys/block/zram0/comp_algorithm - -# задаем размер zram (FRACTION - размер устройства zram0 в процентах от MemTotal) -FRACTION=100 -MEMORY=`perl -ne'/^MemTotal:\s+(\d+)/ && print $1*1024;' < /proc/meminfo` -SIZE=$(( MEMORY * FRACTION / 100 )) - -echo $SIZE > /sys/block/zram0/disksize -#echo 10G > /sys/block/zram0/disksize # можно задать размер zram в гигабайтах, вместо процентов от MemTotal - -# форматируем устройство zram0 как swap -mkswap -L zram0 /dev/zram0 - -# включаем подкачку -swapon -d -p 10 /dev/zram0 -