Merge pull request #1338 from mlaventure/client-reconnect-fix
Client reconnect fix
This commit is contained in:
commit
a6be9f544d
@ -64,9 +64,10 @@ func New(address string, opts ...ClientOpt) (*Client, error) {
|
||||
gopts := []grpc.DialOption{
|
||||
grpc.WithBlock(),
|
||||
grpc.WithInsecure(),
|
||||
grpc.WithTimeout(100 * time.Second),
|
||||
grpc.WithTimeout(60 * time.Second),
|
||||
grpc.FailOnNonTempDialError(true),
|
||||
grpc.WithDialer(dialer),
|
||||
grpc.WithBackoffMaxDelay(3 * time.Second),
|
||||
grpc.WithDialer(Dialer),
|
||||
}
|
||||
if len(copts.dialOptions) > 0 {
|
||||
gopts = copts.dialOptions
|
||||
@ -78,7 +79,7 @@ func New(address string, opts ...ClientOpt) (*Client, error) {
|
||||
grpc.WithStreamInterceptor(stream),
|
||||
)
|
||||
}
|
||||
conn, err := grpc.Dial(dialAddress(address), gopts...)
|
||||
conn, err := grpc.Dial(DialAddress(address), gopts...)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to dial %q", address)
|
||||
}
|
||||
|
@ -10,9 +10,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc/grpclog"
|
||||
|
||||
@ -27,6 +25,8 @@ var (
|
||||
noDaemon bool
|
||||
noCriu bool
|
||||
supportsCriu bool
|
||||
|
||||
ctrd = &daemon{}
|
||||
)
|
||||
|
||||
func init() {
|
||||
@ -55,7 +55,6 @@ func TestMain(m *testing.M) {
|
||||
supportsCriu = err == nil && !noCriu
|
||||
|
||||
var (
|
||||
cmd *exec.Cmd
|
||||
buf = bytes.NewBuffer(nil)
|
||||
ctx, cancel = testContext()
|
||||
)
|
||||
@ -64,27 +63,20 @@ func TestMain(m *testing.M) {
|
||||
if !noDaemon {
|
||||
os.RemoveAll(defaultRoot)
|
||||
|
||||
// setup a new containerd daemon if !testing.Short
|
||||
cmd = exec.Command("containerd",
|
||||
err := ctrd.start("containerd", address, []string{
|
||||
"--root", defaultRoot,
|
||||
"--address", address,
|
||||
"--log-level", "debug",
|
||||
)
|
||||
cmd.Stdout = buf
|
||||
cmd.Stderr = buf
|
||||
if err := cmd.Start(); err != nil {
|
||||
cmd.Wait()
|
||||
}, buf, buf)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s: %s", err, buf.String())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
client, err := waitForDaemonStart(ctx, address)
|
||||
client, err := ctrd.waitForStart(ctx)
|
||||
if err != nil {
|
||||
if cmd.Process != nil {
|
||||
cmd.Process.Kill()
|
||||
}
|
||||
cmd.Wait()
|
||||
ctrd.Kill()
|
||||
ctrd.Wait()
|
||||
fmt.Fprintf(os.Stderr, "%s: %s", err, buf.String())
|
||||
os.Exit(1)
|
||||
}
|
||||
@ -105,8 +97,8 @@ func TestMain(m *testing.M) {
|
||||
// pull a seed image
|
||||
if runtime.GOOS != "windows" { // TODO: remove once pull is supported on windows
|
||||
if _, err = client.Pull(ctx, testImage, WithPullUnpack); err != nil {
|
||||
cmd.Process.Signal(syscall.SIGTERM)
|
||||
cmd.Wait()
|
||||
ctrd.Stop()
|
||||
ctrd.Wait()
|
||||
fmt.Fprintf(os.Stderr, "%s: %s", err, buf.String())
|
||||
os.Exit(1)
|
||||
}
|
||||
@ -126,12 +118,12 @@ func TestMain(m *testing.M) {
|
||||
|
||||
if !noDaemon {
|
||||
// tear down the daemon and resources created
|
||||
if err := cmd.Process.Signal(syscall.SIGTERM); err != nil {
|
||||
if err := cmd.Process.Kill(); err != nil {
|
||||
if err := ctrd.Stop(); err != nil {
|
||||
if err := ctrd.Kill(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "failed to signal containerd", err)
|
||||
}
|
||||
}
|
||||
if err := cmd.Wait(); err != nil {
|
||||
if err := ctrd.Wait(); err != nil {
|
||||
if _, ok := err.(*exec.ExitError); !ok {
|
||||
fmt.Fprintln(os.Stderr, "failed to wait for containerd", err)
|
||||
}
|
||||
@ -148,28 +140,6 @@ func TestMain(m *testing.M) {
|
||||
os.Exit(status)
|
||||
}
|
||||
|
||||
func waitForDaemonStart(ctx context.Context, address string) (*Client, error) {
|
||||
var (
|
||||
client *Client
|
||||
serving bool
|
||||
err error
|
||||
)
|
||||
|
||||
for i := 0; i < 20; i++ {
|
||||
if client == nil {
|
||||
client, err = New(address)
|
||||
}
|
||||
if err == nil {
|
||||
serving, err = client.IsServing(ctx)
|
||||
if serving {
|
||||
return client, nil
|
||||
}
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
return nil, fmt.Errorf("containerd did not start within 2s: %v", err)
|
||||
}
|
||||
|
||||
func newClient(t testing.TB, address string, opts ...ClientOpt) (*Client, error) {
|
||||
if testing.Short() {
|
||||
t.Skip()
|
||||
|
@ -5,15 +5,70 @@ package containerd
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func dialer(address string, timeout time.Duration) (net.Conn, error) {
|
||||
address = strings.TrimPrefix(address, "unix://")
|
||||
return net.DialTimeout("unix", address, timeout)
|
||||
func isNoent(err error) bool {
|
||||
if err != nil {
|
||||
if nerr, ok := err.(*net.OpError); ok {
|
||||
if serr, ok := nerr.Err.(*os.SyscallError); ok {
|
||||
if serr.Err == syscall.ENOENT {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func dialAddress(address string) string {
|
||||
type dialResult struct {
|
||||
c net.Conn
|
||||
err error
|
||||
}
|
||||
|
||||
func Dialer(address string, timeout time.Duration) (net.Conn, error) {
|
||||
var (
|
||||
stopC = make(chan struct{})
|
||||
synC = make(chan *dialResult)
|
||||
)
|
||||
address = strings.TrimPrefix(address, "unix://")
|
||||
go func() {
|
||||
defer close(synC)
|
||||
for {
|
||||
select {
|
||||
case <-stopC:
|
||||
return
|
||||
default:
|
||||
c, err := net.DialTimeout("unix", address, timeout)
|
||||
if isNoent(err) {
|
||||
<-time.After(10 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
synC <- &dialResult{c, err}
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
select {
|
||||
case dr := <-synC:
|
||||
return dr.c, dr.err
|
||||
case <-time.After(timeout):
|
||||
close(stopC)
|
||||
go func() {
|
||||
dr := <-synC
|
||||
if dr != nil {
|
||||
dr.c.Close()
|
||||
}
|
||||
}()
|
||||
return nil, errors.Errorf("dial %s: no such file or directory", address)
|
||||
}
|
||||
}
|
||||
|
||||
func DialAddress(address string) string {
|
||||
return fmt.Sprintf("unix://%s", address)
|
||||
}
|
||||
|
@ -7,10 +7,10 @@ import (
|
||||
winio "github.com/Microsoft/go-winio"
|
||||
)
|
||||
|
||||
func dialer(address string, timeout time.Duration) (net.Conn, error) {
|
||||
func Dialer(address string, timeout time.Duration) (net.Conn, error) {
|
||||
return winio.DialPipe(address, &timeout)
|
||||
}
|
||||
|
||||
func dialAddress(address string) string {
|
||||
func DialAddress(address string) string {
|
||||
return address
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd"
|
||||
eventsapi "github.com/containerd/containerd/api/services/events/v1"
|
||||
"github.com/containerd/containerd/errdefs"
|
||||
"github.com/containerd/containerd/events"
|
||||
@ -80,7 +81,7 @@ func main() {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
server := grpc.NewServer()
|
||||
server := newServer()
|
||||
e, err := connectEvents(context.GlobalString("address"))
|
||||
if err != nil {
|
||||
return err
|
||||
@ -171,7 +172,7 @@ func dumpStacks() {
|
||||
}
|
||||
|
||||
func connectEvents(address string) (eventsapi.EventsClient, error) {
|
||||
conn, err := connect(address, dialer)
|
||||
conn, err := connect(address, containerd.Dialer)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to dial %q", address)
|
||||
}
|
||||
@ -182,26 +183,18 @@ func connect(address string, d func(string, time.Duration) (net.Conn, error)) (*
|
||||
gopts := []grpc.DialOption{
|
||||
grpc.WithBlock(),
|
||||
grpc.WithInsecure(),
|
||||
grpc.WithTimeout(100 * time.Second),
|
||||
grpc.WithTimeout(60 * time.Second),
|
||||
grpc.WithDialer(d),
|
||||
grpc.FailOnNonTempDialError(true),
|
||||
grpc.WithBackoffMaxDelay(3 * time.Second),
|
||||
}
|
||||
conn, err := grpc.Dial(dialAddress(address), gopts...)
|
||||
conn, err := grpc.Dial(containerd.DialAddress(address), gopts...)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "failed to dial %q", address)
|
||||
}
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func dialer(address string, timeout time.Duration) (net.Conn, error) {
|
||||
address = strings.TrimPrefix(address, "unix://")
|
||||
return net.DialTimeout("unix", address, timeout)
|
||||
}
|
||||
|
||||
func dialAddress(address string) string {
|
||||
return fmt.Sprintf("unix://%s", address)
|
||||
}
|
||||
|
||||
type remoteEventsPublisher struct {
|
||||
client eventsapi.EventsClient
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ func setupSignals() (chan os.Signal, error) {
|
||||
}
|
||||
|
||||
func newServer() *grpc.Server {
|
||||
return grpc.NewServer(grpc.Creds(NewUnixSocketCredentils(0, 0)))
|
||||
return grpc.NewServer(grpc.Creds(NewUnixSocketCredentials(0, 0)))
|
||||
}
|
||||
|
||||
type unixSocketCredentials struct {
|
||||
@ -42,7 +42,7 @@ type unixSocketCredentials struct {
|
||||
serverName string
|
||||
}
|
||||
|
||||
func NewUnixSocketCredentils(uid, gid int) credentials.TransportCredentials {
|
||||
func NewUnixSocketCredentials(uid, gid int) credentials.TransportCredentials {
|
||||
return &unixSocketCredentials{uid, gid, "locahost"}
|
||||
}
|
||||
|
||||
|
@ -4,7 +4,9 @@ package containerd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/cgroups"
|
||||
"github.com/containerd/containerd/linux/runcopts"
|
||||
@ -175,3 +177,94 @@ func TestShimInCgroup(t *testing.T) {
|
||||
|
||||
<-statusC
|
||||
}
|
||||
|
||||
func TestDaemonRestart(t *testing.T) {
|
||||
client, err := newClient(t, address)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
var (
|
||||
image Image
|
||||
ctx, cancel = testContext()
|
||||
id = t.Name()
|
||||
)
|
||||
defer cancel()
|
||||
|
||||
image, err = client.GetImage(ctx, testImage)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
spec, err := generateSpec(withImageConfig(ctx, image), withProcessArgs("sleep", "30"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
container, err := client.NewContainer(ctx, id, WithSpec(spec), withNewSnapshot(id, image))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
defer container.Delete(ctx, WithSnapshotCleanup)
|
||||
|
||||
task, err := container.NewTask(ctx, Stdio)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
defer task.Delete(ctx)
|
||||
|
||||
synC := make(chan struct{})
|
||||
statusC := make(chan uint32, 1)
|
||||
go func() {
|
||||
synC <- struct{}{}
|
||||
status, err := task.Wait(ctx)
|
||||
if err == nil {
|
||||
t.Errorf(`first task.Wait() should have failed with "transport is closing"`)
|
||||
}
|
||||
statusC <- status
|
||||
}()
|
||||
<-synC
|
||||
|
||||
if err := task.Start(ctx); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := ctrd.Restart(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
<-statusC
|
||||
|
||||
serving := false
|
||||
for i := 0; i < 20; i++ {
|
||||
serving, err = client.IsServing(ctx)
|
||||
if serving {
|
||||
break
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
if !serving {
|
||||
t.Fatalf("containerd did not start within 2s: %v", err)
|
||||
}
|
||||
|
||||
go func() {
|
||||
synC <- struct{}{}
|
||||
status, err := task.Wait(ctx)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
statusC <- status
|
||||
}()
|
||||
<-synC
|
||||
|
||||
if err := task.Kill(ctx, syscall.SIGKILL); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
<-statusC
|
||||
}
|
||||
|
115
daemon_test.go
Normal file
115
daemon_test.go
Normal file
@ -0,0 +1,115 @@
|
||||
package containerd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os/exec"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
type daemon struct {
|
||||
sync.Mutex
|
||||
addr string
|
||||
cmd *exec.Cmd
|
||||
}
|
||||
|
||||
func (d *daemon) start(name, address string, args []string, stdout, stderr io.Writer) error {
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
if d.cmd != nil {
|
||||
return errors.New("daemon is already running")
|
||||
}
|
||||
args = append(args, []string{"--address", address}...)
|
||||
cmd := exec.Command(name, args...)
|
||||
cmd.Stdout = stdout
|
||||
cmd.Stderr = stderr
|
||||
if err := cmd.Start(); err != nil {
|
||||
cmd.Wait()
|
||||
return errors.Wrap(err, "failed to start daemon")
|
||||
}
|
||||
d.addr = address
|
||||
d.cmd = cmd
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *daemon) waitForStart(ctx context.Context) (*Client, error) {
|
||||
var (
|
||||
client *Client
|
||||
serving bool
|
||||
err error
|
||||
)
|
||||
|
||||
for i := 0; i < 20; i++ {
|
||||
if client == nil {
|
||||
client, err = New(d.addr)
|
||||
}
|
||||
if err == nil {
|
||||
serving, err = client.IsServing(ctx)
|
||||
if serving {
|
||||
return client, nil
|
||||
}
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
return nil, fmt.Errorf("containerd did not start within 2s: %v", err)
|
||||
}
|
||||
|
||||
func (d *daemon) Stop() error {
|
||||
d.Lock()
|
||||
d.Unlock()
|
||||
if d.cmd == nil {
|
||||
return errors.New("daemon is not running")
|
||||
}
|
||||
return d.cmd.Process.Signal(syscall.SIGTERM)
|
||||
}
|
||||
|
||||
func (d *daemon) Kill() error {
|
||||
d.Lock()
|
||||
d.Unlock()
|
||||
if d.cmd == nil {
|
||||
return errors.New("daemon is not running")
|
||||
}
|
||||
return d.cmd.Process.Kill()
|
||||
}
|
||||
|
||||
func (d *daemon) Wait() error {
|
||||
d.Lock()
|
||||
d.Unlock()
|
||||
if d.cmd == nil {
|
||||
return errors.New("daemon is not running")
|
||||
}
|
||||
return d.cmd.Wait()
|
||||
}
|
||||
|
||||
func (d *daemon) Restart() error {
|
||||
d.Lock()
|
||||
d.Unlock()
|
||||
if d.cmd == nil {
|
||||
return errors.New("daemon is not running")
|
||||
}
|
||||
|
||||
var err error
|
||||
if err = d.cmd.Process.Signal(syscall.SIGTERM); err != nil {
|
||||
return errors.Wrap(err, "failed to signal daemon")
|
||||
}
|
||||
|
||||
d.cmd.Wait()
|
||||
|
||||
<-time.After(1 * time.Second)
|
||||
|
||||
cmd := exec.Command(d.cmd.Path, d.cmd.Args[1:]...)
|
||||
cmd.Stdout = d.cmd.Stdout
|
||||
cmd.Stderr = d.cmd.Stderr
|
||||
if err := cmd.Start(); err != nil {
|
||||
cmd.Wait()
|
||||
return errors.Wrap(err, "failed to start new daemon instance")
|
||||
}
|
||||
d.cmd = cmd
|
||||
|
||||
return nil
|
||||
}
|
@ -17,10 +17,11 @@ import (
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func loadBundle(path, workdir, namespace string, events *events.Exchange) *bundle {
|
||||
func loadBundle(path, workdir, namespace, id string, events *events.Exchange) *bundle {
|
||||
return &bundle{
|
||||
path: path,
|
||||
namespace: namespace,
|
||||
id: id,
|
||||
events: events,
|
||||
workDir: workdir,
|
||||
}
|
||||
|
@ -215,6 +215,7 @@ func (r *Runtime) Delete(ctx context.Context, c runtime.Task) (*runtime.Exit, er
|
||||
filepath.Join(r.state, namespace, lc.id),
|
||||
filepath.Join(r.root, namespace, lc.id),
|
||||
namespace,
|
||||
lc.id,
|
||||
r.events,
|
||||
)
|
||||
if err := bundle.Delete(); err != nil {
|
||||
@ -267,7 +268,8 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) {
|
||||
continue
|
||||
}
|
||||
id := path.Name()
|
||||
bundle := loadBundle(filepath.Join(r.state, ns, id), filepath.Join(r.root, ns, id), ns, r.events)
|
||||
bundle := loadBundle(filepath.Join(r.state, ns, id),
|
||||
filepath.Join(r.root, ns, id), ns, id, r.events)
|
||||
|
||||
s, err := bundle.Connect(ctx, r.remote)
|
||||
if err != nil {
|
||||
|
2
task.go
2
task.go
@ -220,7 +220,7 @@ func (t *task) Wait(ctx context.Context) (uint32, error) {
|
||||
for {
|
||||
evt, err := eventstream.Recv()
|
||||
if err != nil {
|
||||
return UnknownExitStatus, err
|
||||
return UnknownExitStatus, errdefs.FromGRPC(err)
|
||||
}
|
||||
if typeurl.Is(evt.Event, &eventsapi.TaskExit{}) {
|
||||
v, err := typeurl.UnmarshalAny(evt.Event)
|
||||
|
Loading…
Reference in New Issue
Block a user