speeding up the search for the victim

2020-03-29 05:21:19 +09:00 · 2020-03-29 05:21:19 +09:00 · 0b9be5a41c
commit 0b9be5a41c
parent 72b1197aff
1 changed files with 157 additions and 177 deletions
--- a/nohang/nohang
+++ b/nohang/nohang
@ -290,23 +290,27 @@ def pop(cmd, username):
    ))
    if swap_total == 0:
-        wait_time = 5
+        wait_time = 10
    else:
-        wait_time = 25
+        wait_time = 30
    t3 = monotonic()
-    with Popen(cmd) as proc:
+    try:
-        try:
+        with Popen(cmd) as proc:
-            proc.wait(timeout=wait_time)
+            try:
-            err = proc.poll()
+                proc.wait(timeout=wait_time)
-            t4 = monotonic()
+                err = proc.poll()
-        except TimeoutExpired:
+                t4 = monotonic()
-            proc.kill()
+            except TimeoutExpired:
-            t4 = monotonic()
+                proc.kill()
                t4 = monotonic()
-            if debug_gui_notifications:
+                if debug_gui_notifications:
-                log('TimeoutExpired: notify user: {}'.format(username))
+                    log('TimeoutExpired: notify user: {}'.format(username))
    except Exception as e:
        th_name = threading.current_thread().getName()
        log('Exception in {}: {}'.format(th_name, e))
    if debug_gui_notifications:
        log('Popen time: {} sec; exit status: {}; cmd: {}'.format(
@ -472,7 +476,6 @@ def check_config():
    log('    fill_rate_zram:  {}'.format(fill_rate_zram))
    log('    max_sleep:       {} sec'.format(max_sleep))
    log('    min_sleep:       {} sec'.format(min_sleep))
    log('    over_sleep:      {} sec'.format(over_sleep))
    log('\n4. Warnings and notifications')
@ -732,7 +735,7 @@ def signal_handler(signum, frame):
 def signal_handler_inner(signum, frame):
    """
    """
-    log('Signal handler called with the {} signal (ignored) '.format(
+    log('Got the {} signal (ignored) '.format(
        sig_dict[signum]))
@ -887,7 +890,7 @@ def pid_to_name(pid):
    """
    """
    try:
-        with open('/proc/' + pid + '/comm', 'rb') as f:
+        with open('/proc/{}/comm'.format(pid), 'rb', buffering=0) as f:
            return f.read().decode('utf-8', 'ignore')[:-1]
    except FileNotFoundError:
        return ''
@ -1001,23 +1004,25 @@ def pid_to_cwd(pid):
 def pid_to_uid(pid):
    """return euid"""
    try:
-        with open('/proc/' + pid + '/status') as f:
+        with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f:
            for n, line in enumerate(f):
                if n is uid_index:
                    return line.split('\t')[2]
    except UnicodeDecodeError:
        with open('/proc/' + pid + '/status', 'rb') as f:
            f_list = f.read().decode('utf-8', 'ignore').split('\n')
            return f_list[uid_index].split('\t')[2]
    except FileNotFoundError:
        return ''
    except ProcessLookupError:
        return ''
-def pid_to_badness(pid):
+def pid_to_badness(pid, oom_score):
    """Find and modify badness (if it needs)."""
    oom_score_adj = None
    try:
-        oom_score = int(rline1('/proc/' + pid + '/oom_score'))
+
        if oom_score is None:
            oom_score = pid_to_oom_score(pid)
        if oom_score == 0:
            return oom_score, oom_score
@ -1025,7 +1030,7 @@ def pid_to_badness(pid):
        badness = oom_score
        if ignore_positive_oom_score_adj:
-            oom_score_adj = int(rline1('/proc/' + pid + '/oom_score_adj'))
+            oom_score_adj = pid_to_oom_score_adj(pid)
            if oom_score_adj > 0:
                badness = badness - oom_score_adj
@ -1038,8 +1043,7 @@ def pid_to_badness(pid):
                        badness += badness_adj
                    else:
                        if oom_score_adj is None:
-                            oom_score_adj = int(rline1(
+                            oom_score_adj = pid_to_oom_score_adj(pid)
                                '/proc/' + pid + '/oom_score_adj'))
                        if oom_score_adj >= 0:
                            badness += badness_adj
@ -1052,8 +1056,7 @@ def pid_to_badness(pid):
                        badness += badness_adj
                    else:
                        if oom_score_adj is None:
-                            oom_score_adj = int(rline1(
+                            oom_score_adj = pid_to_oom_score_adj(pid)
                                '/proc/' + pid + '/oom_score_adj'))
                        if oom_score_adj >= 0:
                            badness += badness_adj
@ -1066,8 +1069,7 @@ def pid_to_badness(pid):
                        badness += badness_adj
                    else:
                        if oom_score_adj is None:
-                            oom_score_adj = int(rline1(
+                            oom_score_adj = pid_to_oom_score_adj(pid)
                                '/proc/' + pid + '/oom_score_adj'))
                        if oom_score_adj >= 0:
                            badness += badness_adj
@ -1080,8 +1082,7 @@ def pid_to_badness(pid):
                        badness += badness_adj
                    else:
                        if oom_score_adj is None:
-                            oom_score_adj = int(rline1(
+                            oom_score_adj = pid_to_oom_score_adj(pid)
                                '/proc/' + pid + '/oom_score_adj'))
                        if oom_score_adj >= 0:
                            badness += badness_adj
@ -1094,8 +1095,7 @@ def pid_to_badness(pid):
                        badness += badness_adj
                    else:
                        if oom_score_adj is None:
-                            oom_score_adj = int(rline1(
+                            oom_score_adj = pid_to_oom_score_adj(pid)
                                '/proc/' + pid + '/oom_score_adj'))
                        if oom_score_adj >= 0:
                            badness += badness_adj
@ -1108,8 +1108,7 @@ def pid_to_badness(pid):
                        badness += badness_adj
                    else:
                        if oom_score_adj is None:
-                            oom_score_adj = int(rline1(
+                            oom_score_adj = pid_to_oom_score_adj(pid)
                                '/proc/' + pid + '/oom_score_adj'))
                        if oom_score_adj >= 0:
                            badness += badness_adj
@ -1122,8 +1121,7 @@ def pid_to_badness(pid):
                        badness += badness_adj
                    else:
                        if oom_score_adj is None:
-                            oom_score_adj = int(rline1(
+                            oom_score_adj = pid_to_oom_score_adj(pid)
                                '/proc/' + pid + '/oom_score_adj'))
                        if oom_score_adj >= 0:
                            badness += badness_adj
@ -1136,8 +1134,7 @@ def pid_to_badness(pid):
                        badness += badness_adj
                    else:
                        if oom_score_adj is None:
-                            oom_score_adj = int(rline1(
+                            oom_score_adj = pid_to_oom_score_adj(pid)
                                '/proc/' + pid + '/oom_score_adj'))
                        if oom_score_adj >= 0:
                            badness += badness_adj
@ -1157,58 +1154,7 @@ def pid_to_status(pid):
    """
    try:
-        with open('/proc/' + pid + '/status') as f:
+        with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f:
            for n, line in enumerate(f):
                if n == 0:
                    name = line.split('\t')[1][:-1]
                if n is state_index:
                    state = line.split('\t')[1][0]
                    continue
                if n is ppid_index:
                    ppid = line.split('\t')[1][:-1]
                    continue
                if n is uid_index:
                    uid = line.split('\t')[2]
                    continue
                if n is vm_size_index:
                    vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
                    continue
                if n is vm_rss_index:
                    vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
                    continue
                if n is vm_swap_index:
                    vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
                    break
        return name, state, ppid, uid, vm_size, vm_rss, vm_swap
    except UnicodeDecodeError:
        return pid_to_status_unicode(pid)
    except FileNotFoundError:
        return None
    except ProcessLookupError:
        return None
    except ValueError:
        return None
 def pid_to_status_unicode(pid):
    """
    """
    try:
        with open('/proc/' + pid + '/status', 'rb') as f:
            f_list = f.read().decode('utf-8', 'ignore').split('\n')
            for i in range(len(f_list)):
@ -1512,12 +1458,101 @@ def alive_pid_list():
    return pid_list
 def pid_to_oom_score(pid):
    try:
        with open('/proc/{}/oom_score'.format(pid), 'rb', buffering=0) as f:
            return int(f.read())
    except FileNotFoundError:
        return 0
    except ProcessLookupError:
        return 0
    except NotADirectoryError:
        return 0
 def pid_to_oom_score_adj(pid):
    try:
        with open('/proc/{}/oom_score_adj'.format(pid), 'rb', buffering=0
                  ) as f:
            return int(f.read())
    except FileNotFoundError:
        return 0
    except ProcessLookupError:
        return 0
    except NotADirectoryError:
        return 0
 def badness_pid_list():
    """
    """
    pid_b_list = []
    for pid in os.listdir('/proc'):
        o = pid_to_oom_score(pid)
        if o >= 1:
            if pid[0].isdecimal() is False:
                continue
            if pid == self_pid or pid == '1':
                continue
            b = pid_to_badness(pid, o)[0]
            # log('PID: {}, oom_score: {}, badness: {}, Name: {}'.format(
            #     pid, o, b, pid_to_name(pid)))
            pid_b_list.append((pid, b))
    return pid_b_list
 def fast_find_victim():
    """
    """
    ft1 = monotonic()
    pid_badness_list = badness_pid_list()
    real_proc_num = len(pid_badness_list)
    if real_proc_num == 0:
        log('Found {} tasks with non-zero oom_score (except init and '
            'self)'.format(real_proc_num))
        return None
    # Make list of (pid, badness) tuples, sorted by 'badness' values
    # print(pid_badness_list)
    pid_tuple_list = sorted(
        pid_badness_list, key=itemgetter(1), reverse=True)[0]
    pid = pid_tuple_list[0]
    victim_id = get_victim_id(pid)
    # Get maximum 'badness' value
    victim_badness = pid_tuple_list[1]
    victim_name = pid_to_name(pid)
    log('Found {} tasks with non-zero oom_score (except init and self)'.format(
        real_proc_num))
    log(
        'Process with highest badness (found in {} ms):\n  PID: {}, Na'
        'me: {}, badness: {}'.format(
            round((monotonic() - ft1) * 1000),
            pid,
            victim_name,
            victim_badness
        )
    )
    return pid, victim_badness, victim_name, victim_id
 def find_victim(_print_proc_table):
    """
    Find the process with highest badness and its badness adjustment
    Return pid and badness
    """
    if not _print_proc_table:
        return fast_find_victim()
    ft1 = monotonic()
    pid_list = alive_pid_list()
@ -1562,7 +1597,7 @@ def find_victim(_print_proc_table):
    for pid in pid_list:
-        badness = pid_to_badness(pid)[0]
+        badness = pid_to_badness(pid, None)[0]
        if badness is None:
            continue
@ -1570,8 +1605,8 @@ def find_victim(_print_proc_table):
        if _print_proc_table:
            try:
-                oom_score = rline1('/proc/' + pid + '/oom_score')
+                oom_score = pid_to_oom_score(pid)
-                oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
+                oom_score_adj = pid_to_oom_score_adj(pid)
            except FileNotFoundError:
                continue
@ -1609,8 +1644,8 @@ def find_victim(_print_proc_table):
                pid.rjust(7),
                ppid.rjust(7),
                str(badness).rjust(7),
-                oom_score.rjust(9),
+                str(oom_score).rjust(9),
-                oom_score_adj.rjust(13),
+                str(oom_score_adj).rjust(13),
                uid.rjust(10),
                state,
                str(vm_size).rjust(6),
@ -1666,67 +1701,7 @@ def find_victim_info(pid, victim_badness, name):
    try:
-        with open('/proc/' + pid + '/status') as f:
+        with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f:
            for n, line in enumerate(f):
                if n is state_index:
                    state = line.split('\t')[1].rstrip()
                    continue
                if n is uid_index:
                    uid = line.split('\t')[2]
                    continue
                if n is vm_size_index:
                    vm_size = kib_to_mib(int(line.split('\t')[1][:-4]))
                    continue
                if n is vm_rss_index:
                    vm_rss = kib_to_mib(int(line.split('\t')[1][:-4]))
                    continue
                if detailed_rss:
                    if n is anon_index:
                        anon_rss = kib_to_mib(
                            int(line.split('\t')[1][:-4]))
                        continue
                    if n is file_index:
                        file_rss = kib_to_mib(
                            int(line.split('\t')[1][:-4]))
                        continue
                    if n is shmem_index:
                        shmem_rss = kib_to_mib(
                            int(line.split('\t')[1][:-4]))
                        continue
                if n is vm_swap_index:
                    vm_swap = kib_to_mib(int(line.split('\t')[1][:-4]))
                    break
        if print_victim_cmdline:
            cmdline = pid_to_cmdline(pid)
        oom_score = rline1('/proc/' + pid + '/oom_score')
        oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
    except FileNotFoundError:
        x = 'The victim died in the search process: FileNotFoundError'
        log(x)
        update_stat_dict(x)
        print_stat_dict()
        return None
    except ProcessLookupError:
        x = 'The victim died in the search process: ProcessLookupError'
        log(x)
        update_stat_dict(x)
        print_stat_dict()
        return None
    except UnicodeDecodeError:
        with open('/proc/' + pid + '/status', 'rb') as f:
            f_list = f.read().decode('utf-8', 'ignore').split('\n')
            for i in range(len(f_list)):
@ -1764,8 +1739,8 @@ def find_victim_info(pid, victim_badness, name):
        if print_victim_cmdline:
            cmdline = pid_to_cmdline(pid)
-        oom_score = rline1('/proc/' + pid + '/oom_score')
+        oom_score = pid_to_oom_score(pid)
-        oom_score_adj = rline1('/proc/' + pid + '/oom_score_adj')
+        oom_score_adj = pid_to_oom_score_adj(pid)
    except IndexError:
        x = 'The victim died in the search process: IndexError'
@ -1780,19 +1755,6 @@ def find_victim_info(pid, victim_badness, name):
        print_stat_dict()
        return None
    except FileNotFoundError:
        x = 'The victim died in the search process: FileNotFoundError'
        log(x)
        update_stat_dict(x)
        print_stat_dict()
        return None
    except ProcessLookupError:
        x = 'The victim died in the search process: ProcessLookupError'
        log(x)
        update_stat_dict(x)
        print_stat_dict()
        return None
    len_vm = len(str(vm_size))
    try:
@ -2217,11 +2179,25 @@ def implement_corrective_action(
    if x:
        victim_id = cached_victim_id
        pid = victim_id.partition('_pid')[2]
-        victim_badness = pid_to_badness(pid)[0]
+        victim_badness = pid_to_badness(pid, None)[0]
        name = v_dict[victim_id]['name']
        log('New victim is cached victim {} ({})'.format(pid, name))
    else:
-        pid, victim_badness, name, victim_id = find_victim(print_proc_table)
+
        fff = find_victim(print_proc_table)
        if fff is None:
            if debug_sleep:
                log('Sleep {} sec'.format(over_sleep))
            sleep(over_sleep)
            log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
                '<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
            return psi_t0
        pid, victim_badness, name, victim_id = fff
    log('Recheck memory levels...')
@ -2269,6 +2245,8 @@ def implement_corrective_action(
    else:
        log('Thresholds is not exceeded now')
        log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
            '<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
        return psi_t0
    for i in mem_info_list:
@ -2276,6 +2254,8 @@ def implement_corrective_action(
    if new_threshold is None or new_threshold == 'WARN':
        log('Thresholds is not exceeded now')
        log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
            '<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
        return psi_t0
    threshold = new_threshold
@ -2295,7 +2275,7 @@ def implement_corrective_action(
                        dt, 1), max_soft_exit_time))
                if debug_sleep:
-                    log('Sleep {} sec (over_sleep)'.format(over_sleep))
+                    log('Sleep {} sec'.format(over_sleep))
                sleep(over_sleep)
                log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'
@ -2510,7 +2490,7 @@ def implement_corrective_action(
    if vwd is None:
        if debug_sleep:
-            log('Sleep {} sec (over_sleep)'.format(over_sleep))
+            log('Sleep {} sec'.format(over_sleep))
        sleep(over_sleep)
    log('<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'