Merge pull request #6031 from fuweid/carry-5648
runtime: should fail fast if dial error on shim
This commit is contained in:
commit
2d48b6a864
177
integration/shim_dial_unix_test.go
Normal file
177
integration/shim_dial_unix_test.go
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
//go:build !windows
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
/*
|
||||||
|
Copyright The containerd Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package integration
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io/ioutil"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"syscall"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
v1shimcli "github.com/containerd/containerd/runtime/v1/shim/client"
|
||||||
|
v2shimcli "github.com/containerd/containerd/runtime/v2/shim"
|
||||||
|
"github.com/containerd/ttrpc"
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
)
|
||||||
|
|
||||||
|
const abstractSocketPrefix = "\x00"
|
||||||
|
|
||||||
|
// TestFailFastWhenConnectShim is to test that the containerd task manager
|
||||||
|
// should not tolerate ENOENT during restarting. In linux, the containerd shim
|
||||||
|
// always listens on socket before task manager dial. If there is ENOENT or
|
||||||
|
// ECONNREFUSED error, the task manager should clean up because that socket file
|
||||||
|
// is gone or shim doesn't listen on the socket anymore.
|
||||||
|
func TestFailFastWhenConnectShim(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
t.Run("abstract-unix-socket-v1", testFailFastWhenConnectShim(true, v1shimcli.AnonDialer))
|
||||||
|
t.Run("abstract-unix-socket-v2", testFailFastWhenConnectShim(true, v2shimcli.AnonDialer))
|
||||||
|
t.Run("normal-unix-socket-v1", testFailFastWhenConnectShim(false, v1shimcli.AnonDialer))
|
||||||
|
t.Run("normal-unix-socket-v2", testFailFastWhenConnectShim(false, v2shimcli.AnonDialer))
|
||||||
|
}
|
||||||
|
|
||||||
|
type dialFunc func(address string, timeout time.Duration) (net.Conn, error)
|
||||||
|
|
||||||
|
func testFailFastWhenConnectShim(abstract bool, dialFn dialFunc) func(*testing.T) {
|
||||||
|
return func(t *testing.T) {
|
||||||
|
var (
|
||||||
|
ctx = context.Background()
|
||||||
|
addr, listener, cleanup = newTestListener(t, abstract)
|
||||||
|
errCh = make(chan error, 1)
|
||||||
|
|
||||||
|
checkDialErr = func(addr string, errCh chan error, expected error) {
|
||||||
|
go func() {
|
||||||
|
_, err := dialFn(addr, 1*time.Hour)
|
||||||
|
errCh <- err
|
||||||
|
}()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-time.After(10 * time.Second):
|
||||||
|
t.Fatalf("expected fail fast, but got timeout")
|
||||||
|
case err := <-errCh:
|
||||||
|
t.Helper()
|
||||||
|
if !errors.Is(err, expected) {
|
||||||
|
t.Fatalf("expected error %v, but got %v", expected, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
defer cleanup()
|
||||||
|
defer listener.Close()
|
||||||
|
|
||||||
|
ttrpcSrv, err := ttrpc.NewServer()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to new ttrpc server: %v", err)
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
ttrpcSrv.Serve(ctx, listener)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// ttrpcSrv starts in other goroutine so that we need to retry AnonDialer
|
||||||
|
// here until ttrpcSrv receives the request.
|
||||||
|
go func() {
|
||||||
|
to := time.After(10 * time.Second)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-to:
|
||||||
|
errCh <- errors.New("timeout")
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
conn, err := dialFn(addr, 1*time.Hour)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, syscall.ECONNREFUSED) {
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
errCh <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
conn.Close()
|
||||||
|
errCh <- nil
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// it should be successful
|
||||||
|
if err := <-errCh; err != nil {
|
||||||
|
t.Fatalf("failed to dial: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE(fuweid):
|
||||||
|
//
|
||||||
|
// UnixListener will unlink that the socket file when call Close.
|
||||||
|
// Disable unlink when close to keep the socket file.
|
||||||
|
listener.(*net.UnixListener).SetUnlinkOnClose(false)
|
||||||
|
|
||||||
|
listener.Close()
|
||||||
|
ttrpcSrv.Shutdown(ctx)
|
||||||
|
|
||||||
|
checkDialErr(addr, errCh, syscall.ECONNREFUSED)
|
||||||
|
|
||||||
|
// remove the socket file
|
||||||
|
cleanup()
|
||||||
|
|
||||||
|
if abstract {
|
||||||
|
checkDialErr(addr, errCh, syscall.ECONNREFUSED)
|
||||||
|
} else {
|
||||||
|
// should not wait for the socket file show up again.
|
||||||
|
checkDialErr(addr, errCh, syscall.ENOENT)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestListener(t testing.TB, abstract bool) (string, net.Listener, func()) {
|
||||||
|
tmpDir, err := ioutil.TempDir("", "shim-ut-XX")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create tmp directory: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE(fuweid):
|
||||||
|
//
|
||||||
|
// Before patch https://github.com/containerd/containerd/commit/bd908acabd1a31c8329570b5283e8fdca0b39906,
|
||||||
|
// The shim stores the abstract socket file without abstract socket
|
||||||
|
// prefix and `unix://`. For the existing shim, if the socket file
|
||||||
|
// only contains the path, it will indicate that it is abstract socket.
|
||||||
|
// Otherwise, it will be normal socket file formated in `unix:///xyz'.
|
||||||
|
addr := filepath.Join(tmpDir, "uds.socket")
|
||||||
|
if abstract {
|
||||||
|
addr = abstractSocketPrefix + addr
|
||||||
|
} else {
|
||||||
|
addr = "unix://" + addr
|
||||||
|
}
|
||||||
|
|
||||||
|
listener, err := net.Listen("unix", strings.TrimPrefix(addr, "unix://"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to listen on %s: %v", addr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.TrimPrefix(addr, abstractSocketPrefix), listener, func() {
|
||||||
|
os.RemoveAll(tmpDir)
|
||||||
|
}
|
||||||
|
}
|
@ -34,7 +34,6 @@ import (
|
|||||||
|
|
||||||
"github.com/containerd/containerd/events"
|
"github.com/containerd/containerd/events"
|
||||||
"github.com/containerd/containerd/log"
|
"github.com/containerd/containerd/log"
|
||||||
"github.com/containerd/containerd/pkg/dialer"
|
|
||||||
v1 "github.com/containerd/containerd/runtime/v1"
|
v1 "github.com/containerd/containerd/runtime/v1"
|
||||||
"github.com/containerd/containerd/runtime/v1/shim"
|
"github.com/containerd/containerd/runtime/v1/shim"
|
||||||
shimapi "github.com/containerd/containerd/runtime/v1/shim/v1"
|
shimapi "github.com/containerd/containerd/runtime/v1/shim/v1"
|
||||||
@ -298,12 +297,19 @@ func RemoveSocket(address string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AnonDialer returns a dialer for a socket
|
||||||
|
//
|
||||||
|
// NOTE: It is only used for testing.
|
||||||
|
func AnonDialer(address string, timeout time.Duration) (net.Conn, error) {
|
||||||
|
return anonDialer(address, timeout)
|
||||||
|
}
|
||||||
|
|
||||||
func connect(address string, d func(string, time.Duration) (net.Conn, error)) (net.Conn, error) {
|
func connect(address string, d func(string, time.Duration) (net.Conn, error)) (net.Conn, error) {
|
||||||
return d(address, 100*time.Second)
|
return d(address, 100*time.Second)
|
||||||
}
|
}
|
||||||
|
|
||||||
func anonDialer(address string, timeout time.Duration) (net.Conn, error) {
|
func anonDialer(address string, timeout time.Duration) (net.Conn, error) {
|
||||||
return dialer.Dialer(socket(address).path(), timeout)
|
return net.DialTimeout("unix", socket(address).path(), timeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithConnect connects to an existing shim
|
// WithConnect connects to an existing shim
|
||||||
|
@ -32,7 +32,6 @@ import (
|
|||||||
|
|
||||||
"github.com/containerd/containerd/defaults"
|
"github.com/containerd/containerd/defaults"
|
||||||
"github.com/containerd/containerd/namespaces"
|
"github.com/containerd/containerd/namespaces"
|
||||||
"github.com/containerd/containerd/pkg/dialer"
|
|
||||||
"github.com/containerd/containerd/sys"
|
"github.com/containerd/containerd/sys"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
)
|
)
|
||||||
@ -78,9 +77,7 @@ func SocketAddress(ctx context.Context, socketPath, id string) (string, error) {
|
|||||||
|
|
||||||
// AnonDialer returns a dialer for a socket
|
// AnonDialer returns a dialer for a socket
|
||||||
func AnonDialer(address string, timeout time.Duration) (net.Conn, error) {
|
func AnonDialer(address string, timeout time.Duration) (net.Conn, error) {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
return net.DialTimeout("unix", socket(address).path(), timeout)
|
||||||
defer cancel()
|
|
||||||
return dialer.ContextDialer(ctx, socket(address).path())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// AnonReconnectDialer returns a dialer for an existing socket on reconnection
|
// AnonReconnectDialer returns a dialer for an existing socket on reconnection
|
||||||
|
Loading…
Reference in New Issue
Block a user