diff --git a/nohang b/nohang index 607582f..c068be6 100755 --- a/nohang +++ b/nohang @@ -4,7 +4,7 @@ import os from time import sleep, time from operator import itemgetter from sys import stdout -from signal import SIGKILL, SIGTERM +from signal import SIGKILL, SIGTERM, SIGSTOP, SIGCONT start_time = time() @@ -40,11 +40,106 @@ HR = '~' * 79 print_total_stat = True +stop_cont = False +print_states_debug = False + + + ########################################################################## # define functions + + + +def pid_to_state(pid): + return rline1('/proc/' + pid + '/stat').rpartition(')')[2][1] + +def stop(): + print() + print('Stop running processes...') + t1 = time() + t2 = time() + stopped_list = [] + for pid in os.listdir('/proc')[::-1]: + # only directories whose names consist only of numbers, except /proc/1/ + if pid[0].isdecimal() is False or pid is '1' or pid == self_pid: + continue + try: + # print(pid) + if pid_to_state(pid) == 'R': + uid_r = pid_to_uid(pid) + oom_score_r = int( + rline1('/proc/' + pid + '/oom_score') + ) + if uid_r != '0' and oom_score_r > 10: + stopped_list.append(pid) + print('Send SIGSTOP to {}, {}, {}...'.format( + pid, pid_to_name(pid), pid_to_cmdline(pid)[:40])) + os.kill(int(pid), SIGSTOP) + t2 = time() + except FileNotFoundError: + continue + except ProcessLookupError: + continue + print('Stop time:', t2 - t1) + return stopped_list + +def cont(stopped_list): + print() + #print('Continue stopped processes...') + t1 = time() + if len(stopped_list) > 0: + for pid in stopped_list: + #print('Send SIGCONT to', [pid], pid_to_name(pid)) + try: + os.kill(int(pid), SIGCONT) + except FileNotFoundError: + continue + except ProcessLookupError: + continue + t2 = time() + #print('All cont time: ', t2 - t1) + +def print_states(): + if print_states_debug: + print() + t1 = time() + print('non-S states:') + for pid in os.listdir('/proc'): + # only directories whose names consist only of numbers, except /proc/1/ + if pid[0].isdecimal() is False or pid is '1' or pid == self_pid: + continue + try: + s = pid_to_state(pid) + if s == 'S': + continue + else: + print('State: {}, [{}], {}, {}...'.format( + s, pid, pid_to_name(pid), pid_to_cmdline(pid)[:40])) + except FileNotFoundError: + continue + except ProcessLookupError: + continue + t2 = time() + print('print state time:', t2 - t1) + print() + + + + + + + + + + + + + + + def update_stat_dict_and_print(key): if key not in stat_dict: @@ -488,6 +583,13 @@ def find_victim_and_send_signal(signal): Find victim with highest badness and send SIGTERM/SIGKILL """ + + if stop_cont: + print_states() + stopped_list = stop() + + + pid, victim_badness = fattest() name = pid_to_name(pid) @@ -695,12 +797,21 @@ def find_victim_and_send_signal(signal): try: + + + m = check_mem_and_swap() ma = round(int(m[0]) / 1024.0) sf = round(int(m[2]) / 1024.0) print('\nMemory status before sending a signal:\nMemA' 'v: {} MiB, SwFree: {} MiB'.format(ma, sf)) + + + if stop_cont: + os.kill(int(pid), SIGCONT) + + os.kill(int(pid), signal) response_time = time() - time0 send_result = '\033[32mOK\033[0m; response time: {} ms'.format( @@ -749,6 +860,16 @@ def find_victim_and_send_signal(signal): key = 'victim badness < min_badness' update_stat_dict_and_print(key) + + + if stop_cont: + print_states() + cont(stopped_list) + print_states() + + + + sleep_after_send_signal(signal)