|
|
|
|
@@ -70,14 +70,22 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|
|
|
|
}
|
|
|
|
|
name := makeSandboxName(metadata)
|
|
|
|
|
log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name)
|
|
|
|
|
|
|
|
|
|
// cleanupErr records the last error returned by the critical cleanup operations in deferred functions,
|
|
|
|
|
// like CNI teardown and stopping the running sandbox task.
|
|
|
|
|
// If cleanup is not completed for some reason, the CRI-plugin will leave the sandbox
|
|
|
|
|
// in a not-ready state, which can later be cleaned up by the next execution of the kubelet's syncPod workflow.
|
|
|
|
|
var cleanupErr error
|
|
|
|
|
|
|
|
|
|
// Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
|
|
|
|
|
// same sandbox.
|
|
|
|
|
if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to reserve sandbox name %q: %w", name, err)
|
|
|
|
|
}
|
|
|
|
|
defer func() {
|
|
|
|
|
// Release the name if the function returns with an error.
|
|
|
|
|
if retErr != nil {
|
|
|
|
|
// Release the name if the function returns with an error and all the resource cleanup is done.
|
|
|
|
|
// When cleanupErr != nil, the name will be cleaned in sandbox_remove.
|
|
|
|
|
if retErr != nil && cleanupErr == nil {
|
|
|
|
|
c.sandboxNameIndex.ReleaseByName(name)
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
@@ -111,70 +119,13 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|
|
|
|
}
|
|
|
|
|
log.G(ctx).WithField("podsandboxid", id).Debugf("use OCI runtime %+v", ociRuntime)
|
|
|
|
|
|
|
|
|
|
podNetwork := true
|
|
|
|
|
|
|
|
|
|
if goruntime.GOOS != "windows" &&
|
|
|
|
|
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
|
|
|
|
|
// Pod network is not needed on linux with host network.
|
|
|
|
|
podNetwork = false
|
|
|
|
|
}
|
|
|
|
|
if goruntime.GOOS == "windows" &&
|
|
|
|
|
config.GetWindows().GetSecurityContext().GetHostProcess() {
|
|
|
|
|
//Windows HostProcess pods can only run on the host network
|
|
|
|
|
podNetwork = false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if podNetwork {
|
|
|
|
|
netStart := time.Now()
|
|
|
|
|
// If it is not in host network namespace then create a namespace and set the sandbox
|
|
|
|
|
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
|
|
|
|
|
// namespaces. If the pod is in host network namespace then both are empty and should not
|
|
|
|
|
// be used.
|
|
|
|
|
var netnsMountDir = "/var/run/netns"
|
|
|
|
|
if c.config.NetNSMountsUnderStateDir {
|
|
|
|
|
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
|
|
|
|
|
}
|
|
|
|
|
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
|
|
|
|
|
}
|
|
|
|
|
sandbox.NetNSPath = sandbox.NetNS.GetPath()
|
|
|
|
|
defer func() {
|
|
|
|
|
if retErr != nil {
|
|
|
|
|
deferCtx, deferCancel := ctrdutil.DeferContext()
|
|
|
|
|
defer deferCancel()
|
|
|
|
|
// Teardown network if an error is returned.
|
|
|
|
|
if err := c.teardownPodNetwork(deferCtx, sandbox); err != nil {
|
|
|
|
|
log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if err := sandbox.NetNS.Remove(); err != nil {
|
|
|
|
|
log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
|
|
|
|
|
}
|
|
|
|
|
sandbox.NetNSPath = ""
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
// Setup network for sandbox.
|
|
|
|
|
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
|
|
|
|
|
// rely on the assumption that CRI shim will not be querying the network namespace to check the
|
|
|
|
|
// network states such as IP.
|
|
|
|
|
// In future runtime implementation should avoid relying on CRI shim implementation details.
|
|
|
|
|
// In this case however caching the IP will add a subtle performance enhancement by avoiding
|
|
|
|
|
// calls to network namespace of the pod to query the IP of the veth interface on every
|
|
|
|
|
// SandboxStatus request.
|
|
|
|
|
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
|
|
|
|
|
}
|
|
|
|
|
sandboxCreateNetworkTimer.UpdateSince(netStart)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
runtimeStart := time.Now()
|
|
|
|
|
// Create sandbox container.
|
|
|
|
|
// NOTE: sandboxContainerSpec SHOULD NOT have side
|
|
|
|
|
// effect, e.g. accessing/creating files, so that we can test
|
|
|
|
|
// it safely.
|
|
|
|
|
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations)
|
|
|
|
|
// NOTE: the network namespace path will be created later and update through updateNetNamespacePath function
|
|
|
|
|
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, "", ociRuntime.PodAnnotations)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to generate sandbox container spec: %w", err)
|
|
|
|
|
}
|
|
|
|
|
@@ -222,12 +173,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to create containerd container: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add container into sandbox store in INIT state.
|
|
|
|
|
sandbox.Container = container
|
|
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
|
if retErr != nil {
|
|
|
|
|
// Put the sandbox into sandbox store when the some resource fails to be cleaned.
|
|
|
|
|
if retErr != nil && cleanupErr != nil {
|
|
|
|
|
log.G(ctx).WithError(cleanupErr).Errorf("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN", id)
|
|
|
|
|
if err := c.sandboxStore.Add(sandbox); err != nil {
|
|
|
|
|
log.G(ctx).WithError(err).Errorf("failed to add sandbox %+v into store", sandbox)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
|
// Delete container only if all the resource cleanup is done.
|
|
|
|
|
if retErr != nil && cleanupErr == nil {
|
|
|
|
|
deferCtx, deferCancel := ctrdutil.DeferContext()
|
|
|
|
|
defer deferCancel()
|
|
|
|
|
if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil {
|
|
|
|
|
log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id)
|
|
|
|
|
if cleanupErr = container.Delete(deferCtx, containerd.WithSnapshotCleanup); cleanupErr != nil {
|
|
|
|
|
log.G(ctx).WithError(cleanupErr).Errorf("Failed to delete containerd container %q", id)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
@@ -281,6 +247,82 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|
|
|
|
return nil, fmt.Errorf("failed to get sandbox container info: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
podNetwork := true
|
|
|
|
|
|
|
|
|
|
if goruntime.GOOS != "windows" &&
|
|
|
|
|
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
|
|
|
|
|
// Pod network is not needed on linux with host network.
|
|
|
|
|
podNetwork = false
|
|
|
|
|
}
|
|
|
|
|
if goruntime.GOOS == "windows" &&
|
|
|
|
|
config.GetWindows().GetSecurityContext().GetHostProcess() {
|
|
|
|
|
// Windows HostProcess pods can only run on the host network
|
|
|
|
|
podNetwork = false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if podNetwork {
|
|
|
|
|
netStart := time.Now()
|
|
|
|
|
|
|
|
|
|
// If it is not in host network namespace then create a namespace and set the sandbox
|
|
|
|
|
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
|
|
|
|
|
// namespaces. If the pod is in host network namespace then both are empty and should not
|
|
|
|
|
// be used.
|
|
|
|
|
var netnsMountDir = "/var/run/netns"
|
|
|
|
|
if c.config.NetNSMountsUnderStateDir {
|
|
|
|
|
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
|
|
|
|
|
}
|
|
|
|
|
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
|
|
|
|
|
}
|
|
|
|
|
sandbox.NetNSPath = sandbox.NetNS.GetPath()
|
|
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
|
// Remove the network namespace only if all the resource cleanup is done.
|
|
|
|
|
if retErr != nil && cleanupErr == nil {
|
|
|
|
|
if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil {
|
|
|
|
|
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
sandbox.NetNSPath = ""
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
// Update network namespace in the container's spec
|
|
|
|
|
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
|
|
|
|
|
if err := updateContainerSpec(ctx, container, spec); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Define this defer to teardownPodNetwork prior to the setupPodNetwork function call.
|
|
|
|
|
// This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions.
|
|
|
|
|
defer func() {
|
|
|
|
|
// Teardown the network only if all the resource cleanup is done.
|
|
|
|
|
if retErr != nil && cleanupErr == nil {
|
|
|
|
|
deferCtx, deferCancel := ctrdutil.DeferContext()
|
|
|
|
|
defer deferCancel()
|
|
|
|
|
// Teardown network if an error is returned.
|
|
|
|
|
if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil {
|
|
|
|
|
log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
// Setup network for sandbox.
|
|
|
|
|
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
|
|
|
|
|
// rely on the assumption that CRI shim will not be querying the network namespace to check the
|
|
|
|
|
// network states such as IP.
|
|
|
|
|
// In future runtime implementation should avoid relying on CRI shim implementation details.
|
|
|
|
|
// In this case however caching the IP will add a subtle performance enhancement by avoiding
|
|
|
|
|
// calls to network namespace of the pod to query the IP of the veth interface on every
|
|
|
|
|
// SandboxStatus request.
|
|
|
|
|
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sandboxCreateNetworkTimer.UpdateSince(netStart)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create sandbox task in containerd.
|
|
|
|
|
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).",
|
|
|
|
|
id, name)
|
|
|
|
|
@@ -301,6 +343,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|
|
|
|
// Cleanup the sandbox container if an error is returned.
|
|
|
|
|
if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
|
|
|
|
|
log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id)
|
|
|
|
|
cleanupErr = err
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}()
|
|
|
|
|
@@ -339,9 +382,6 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
|
|
|
|
|
return nil, fmt.Errorf("failed to update sandbox status: %w", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Add sandbox into sandbox store in INIT state.
|
|
|
|
|
sandbox.Container = container
|
|
|
|
|
|
|
|
|
|
if err := c.sandboxStore.Add(sandbox); err != nil {
|
|
|
|
|
return nil, fmt.Errorf("failed to add sandbox %+v into store: %w", sandbox, err)
|
|
|
|
|
}
|
|
|
|
|
|