From 403dc83a291684a7d5b7b0e4dfcd60b82a7518ae Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Thu, 16 Jul 2020 14:57:53 +0900 Subject: [PATCH] mount: retry executing the helper binary on ECHILD `exec.CombinedOutput()` intermittently returns `ECHILD` due to our signal handling. `wait(2)`: https://man7.org/linux/man-pages/man2/wait.2.html > ECHILD (for waitpid() or waitid()) The process specified by pid > (waitpid()) or idtype and id (waitid()) does not exist or is > not a child of the calling process. (This can happen for > one's own child if the action for SIGCHLD is set to SIG_IGN. > See also the Linux Notes section about threads.) Fix #4387 Signed-off-by: Akihiro Suda --- mount/mount_linux.go | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/mount/mount_linux.go b/mount/mount_linux.go index a7edd4552..d12a9c5a2 100644 --- a/mount/mount_linux.go +++ b/mount/mount_linux.go @@ -363,10 +363,34 @@ func (m *Mount) mountWithHelper(helperBinary, typePrefix, target string) error { args = append(args, "-o", o) } args = append(args, "-t", strings.TrimPrefix(m.Type, typePrefix)) - cmd := exec.Command(helperBinary, args...) - out, err := cmd.CombinedOutput() + + infoBeforeMount, err := Lookup(target) if err != nil { - return errors.Wrapf(err, "mount helper [%s %v] failed: %q", helperBinary, args, string(out)) + return err } - return nil + + // cmd.CombinedOutput() may intermittently return ECHILD because of our signal handling in shim. + // See #4387 and wait(2). + const retriesOnECHILD = 10 + for i := 0; i < retriesOnECHILD; i++ { + cmd := exec.Command(helperBinary, args...) + out, err := cmd.CombinedOutput() + if err == nil { + return nil + } + if !errors.Is(err, unix.ECHILD) { + return errors.Wrapf(err, "mount helper [%s %v] failed: %q", helperBinary, args, string(out)) + } + // We got ECHILD, we are not sure whether the mount was successful. + // If the mount ID has changed, we are sure we got some new mount, but still not sure it is fully completed. + // So we attempt to unmount the new mount before retrying. + infoAfterMount, err := Lookup(target) + if err != nil { + return err + } + if infoAfterMount.ID != infoBeforeMount.ID { + _ = unmount(target, 0) + } + } + return errors.Errorf("mount helper [%s %v] failed with ECHILD (retired %d times)", helperBinary, args, retriesOnECHILD) }