Add start of shim code to new interfaces
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
253
shim/process.go
Normal file
253
shim/process.go
Normal file
@@ -0,0 +1,253 @@
|
||||
package process
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var errInvalidPidInt = errors.New("containerd: process pid is invalid")
|
||||
|
||||
type process struct {
|
||||
name string
|
||||
root string
|
||||
cmd *exec.Cmd
|
||||
done chan struct{}
|
||||
success bool
|
||||
startTime string
|
||||
mu sync.Mutex
|
||||
containerPid int
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// same checks if the process is the same process originally launched
|
||||
func (p *process) same() (bool, error) {
|
||||
/// for backwards compat assume true if it is not set
|
||||
if p.startTime == "" {
|
||||
return true, nil
|
||||
}
|
||||
pid, err := p.readContainerPid()
|
||||
if err != nil {
|
||||
return false, nil
|
||||
}
|
||||
started, err := readProcessStartTime(pid)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return p.startTime == started, nil
|
||||
}
|
||||
|
||||
func (p *process) checkExited() {
|
||||
err := p.cmd.Wait()
|
||||
if err == nil {
|
||||
p.success = true
|
||||
}
|
||||
if same, _ := p.same(); same && p.hasPid() {
|
||||
// The process changed its PR_SET_PDEATHSIG, so force kill it
|
||||
logrus.Infof("containerd: %s:%s (pid %v) has become an orphan, killing it", p.container.id, p.namae, p.containerPid)
|
||||
if err := unix.Kill(p.containerPid, syscall.SIGKILL); err != nil && err != syscall.ESRCH {
|
||||
logrus.Errorf("containerd: unable to SIGKILL %s:%s (pid %v): %v", p.container.id, p.name, p.containerPid, err)
|
||||
close(p.done)
|
||||
return
|
||||
}
|
||||
// wait for the container process to exit
|
||||
for {
|
||||
if err := unix.Kill(p.pid, 0); err != nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
close(p.done)
|
||||
}
|
||||
|
||||
func (p *process) hasPid() bool {
|
||||
p.mu.Lock()
|
||||
r := p.containerPid > 0
|
||||
p.mu.Unlock()
|
||||
return r
|
||||
}
|
||||
|
||||
func (p *process) setPid(pid int) {
|
||||
p.mu.Lock()
|
||||
p.containerPid = pid
|
||||
p.mu.Unlock()
|
||||
}
|
||||
|
||||
type pidResponse struct {
|
||||
pid int
|
||||
err error
|
||||
}
|
||||
|
||||
func (p *process) waitForCreate() error {
|
||||
r := make(chan pidResponse, 1)
|
||||
go readContainerPid(wc)
|
||||
|
||||
select {
|
||||
case resp := <-r:
|
||||
if resp.err != nil {
|
||||
return resp.err
|
||||
}
|
||||
p.setPid(resp.pid)
|
||||
started, err := readProcessStartTime(resp.pid)
|
||||
if err != nil {
|
||||
logrus.Warnf("containerd: unable to save %s:%s starttime: %v", p.container.id, p.id, err)
|
||||
}
|
||||
// TODO: save start time to disk or process state file
|
||||
p.startTime = started
|
||||
return nil
|
||||
case <-time.After(c.timeout):
|
||||
p.cmd.Process.Kill()
|
||||
p.cmd.Wait()
|
||||
return ErrContainerStartTimeout
|
||||
}
|
||||
}
|
||||
|
||||
func readContainerPid(r chan pidResponse, pidFile string) {
|
||||
for {
|
||||
pid, err := readContainerPid(pidFile)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) || err == errInvalidPidInt {
|
||||
if serr := checkErrorLogs(); serr != nil {
|
||||
r <- pidResponse{
|
||||
err: err,
|
||||
}
|
||||
break
|
||||
}
|
||||
time.Sleep(15 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
r <- pidResponse{
|
||||
err: err,
|
||||
}
|
||||
break
|
||||
}
|
||||
r <- pidResponse{
|
||||
pid: pid,
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
func checkErrorLogs(cmd *exec.Cmd, shimLogPath, runtimeLogPath string) error {
|
||||
alive, err := isAlive(cmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !alive {
|
||||
// runc could have failed to run the container so lets get the error
|
||||
// out of the logs or the shim could have encountered an error
|
||||
messages, err := readLogMessages(shimLogPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, m := range messages {
|
||||
if m.Level == "error" {
|
||||
return fmt.Errorf("shim error: %v", m.Msg)
|
||||
}
|
||||
}
|
||||
// no errors reported back from shim, check for runc/runtime errors
|
||||
messages, err = readLogMessages(runtimeLogPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
err = ErrContainerNotStarted
|
||||
}
|
||||
return err
|
||||
}
|
||||
for _, m := range messages {
|
||||
if m.Level == "error" {
|
||||
return fmt.Errorf("oci runtime error: %v", m.Msg)
|
||||
}
|
||||
}
|
||||
return ErrContainerNotStarted
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readProcessStartTime(pid int) (string, error) {
|
||||
return readProcStatField(pid, 22)
|
||||
}
|
||||
|
||||
func readProcStatField(pid int, field int) (string, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join(string(filepath.Separator), "proc", strconv.Itoa(pid), "stat"))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if field > 2 {
|
||||
// First, split out the name since he could contains spaces.
|
||||
parts := strings.Split(string(data), ") ")
|
||||
// Now split out the rest, we end up with 2 fields less
|
||||
parts = strings.Split(parts[1], " ")
|
||||
return parts[field-2-1], nil // field count start at 1 in manual
|
||||
}
|
||||
parts := strings.Split(string(data), " (")
|
||||
if field == 1 {
|
||||
return parts[0], nil
|
||||
}
|
||||
return strings.Split(parts[1], ") ")[0], nil
|
||||
}
|
||||
|
||||
func readContainerPid(pidFile string) (int, error) {
|
||||
data, err := ioutil.ReadFile(pidFile)
|
||||
if err != nil {
|
||||
return -1, nil
|
||||
}
|
||||
i, err := strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
return -1, errInvalidPidInt
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
// isAlive checks if the shim that launched the container is still alive
|
||||
func isAlive(cmd *exec.Cmd) (bool, error) {
|
||||
if _, err := syscall.Wait4(cmd.Process.Pid, nil, syscall.WNOHANG, nil); err == nil {
|
||||
return true, nil
|
||||
}
|
||||
if err := syscall.Kill(cmd.Process.Pid, 0); err != nil {
|
||||
if err == syscall.ESRCH {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
type message struct {
|
||||
Level string `json:"level"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
func readLogMessages(path string) ([]message, error) {
|
||||
var out []message
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
dec := json.NewDecoder(f)
|
||||
for {
|
||||
var m message
|
||||
if err := dec.Decode(&m); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, m)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
Reference in New Issue
Block a user