mount: retry executing the helper binary on ECHILD

`exec.CombinedOutput()` intermittently returns `ECHILD` due to our
signal handling.

`wait(2)`: https://man7.org/linux/man-pages/man2/wait.2.html

> ECHILD (for waitpid() or waitid()) The process specified by pid
>   (waitpid()) or idtype and id (waitid()) does not exist or is
>   not a child of the calling process.  (This can happen for
>   one's own child if the action for SIGCHLD is set to SIG_IGN.
>   See also the Linux Notes section about threads.)

Fix #4387

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda 2020-07-16 14:57:53 +09:00
parent 1a571fcf1b
commit 403dc83a29
No known key found for this signature in database
GPG Key ID: 49524C6F9F638F1A

View File

@ -363,10 +363,34 @@ func (m *Mount) mountWithHelper(helperBinary, typePrefix, target string) error {
args = append(args, "-o", o)
}
args = append(args, "-t", strings.TrimPrefix(m.Type, typePrefix))
cmd := exec.Command(helperBinary, args...)
out, err := cmd.CombinedOutput()
infoBeforeMount, err := Lookup(target)
if err != nil {
return errors.Wrapf(err, "mount helper [%s %v] failed: %q", helperBinary, args, string(out))
return err
}
return nil
// cmd.CombinedOutput() may intermittently return ECHILD because of our signal handling in shim.
// See #4387 and wait(2).
const retriesOnECHILD = 10
for i := 0; i < retriesOnECHILD; i++ {
cmd := exec.Command(helperBinary, args...)
out, err := cmd.CombinedOutput()
if err == nil {
return nil
}
if !errors.Is(err, unix.ECHILD) {
return errors.Wrapf(err, "mount helper [%s %v] failed: %q", helperBinary, args, string(out))
}
// We got ECHILD, we are not sure whether the mount was successful.
// If the mount ID has changed, we are sure we got some new mount, but still not sure it is fully completed.
// So we attempt to unmount the new mount before retrying.
infoAfterMount, err := Lookup(target)
if err != nil {
return err
}
if infoAfterMount.ID != infoBeforeMount.ID {
_ = unmount(target, 0)
}
}
return errors.Errorf("mount helper [%s %v] failed with ECHILD (retired %d times)", helperBinary, args, retriesOnECHILD)
}