cri,nri: block NRI plugin sync. during event processing.

Block the synchronization of registering NRI plugins during
CRI events to avoid the plugin ending up in an inconsistent
starting state after initial sync (missing pods, containers
or missed events for some pods or containers).

Signed-off-by: Krisztian Litkey <krisztian.litkey@intel.com>
This commit is contained in:
Krisztian Litkey 2025-01-31 20:15:12 +02:00
parent e465b45f9c
commit 79cdbf61b6
No known key found for this signature in database
GPG Key ID: 637F2939D50AF85D
11 changed files with 49 additions and 0 deletions

View File

@ -358,6 +358,15 @@ func (a *API) WithContainerExit(criCtr *cstore.Container) containerd.ProcessDele
}
}
type PluginSyncBlock = nri.PluginSyncBlock
func (a *API) BlockPluginSync() *PluginSyncBlock {
if a.IsDisabled() {
return nil
}
return a.nri.BlockPluginSync()
}
//
// NRI-CRI 'domain' interface
//

View File

@ -108,6 +108,14 @@ func (*API) WithContainerExit(*cstore.Container) containerd.ProcessDeleteOpts {
}
}
type PluginSyncBlock struct{}
func (*API) BlockPluginSync() *PluginSyncBlock {
return nil
}
func (*PluginSyncBlock) Unblock() {}
//
// NRI-CRI no-op 'domain' interface
//

View File

@ -313,6 +313,8 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
}
}()
defer c.nri.BlockPluginSync().Unblock()
var cntr containerd.Container
if cntr, err = c.client.NewContainer(ctx, id, opts...); err != nil {
return nil, fmt.Errorf("failed to create containerd container: %w", err)

View File

@ -44,6 +44,9 @@ func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveConta
log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", ctrID)
return &runtime.RemoveContainerResponse{}, nil
}
defer c.nri.BlockPluginSync().Unblock()
id := container.ID
span.SetAttributes(tracing.Attribute("container.id", id))
i, err := container.Container.Info(ctx)

View File

@ -156,6 +156,8 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
return nil, fmt.Errorf("failed to wait for containerd task: %w", err)
}
defer c.nri.BlockPluginSync().Unblock()
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()

View File

@ -51,6 +51,9 @@ func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainer
// https://github.com/kubernetes/cri-api/blob/c20fa40/pkg/apis/runtime/v1/api.proto#L67-L68
return &runtime.StopContainerResponse{}, nil
}
defer c.nri.BlockPluginSync().Unblock()
span.SetAttributes(tracing.Attribute("container.id", container.ID))
if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil {
return nil, err

View File

@ -47,6 +47,8 @@ func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.Up
return nil, err
}
defer c.nri.BlockPluginSync().Unblock()
resources := r.GetLinux()
updated, err := c.nri.UpdateContainerResources(ctx, &sandbox, &container, resources)
if err != nil {

View File

@ -44,6 +44,9 @@ func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodS
r.GetPodSandboxId())
return &runtime.RemovePodSandboxResponse{}, nil
}
defer c.nri.BlockPluginSync().Unblock()
// Use the full sandbox id.
id := sandbox.ID
span.SetAttributes(tracing.Attribute("sandbox.id", id))

View File

@ -300,6 +300,8 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
sandbox.ProcessLabel = labels["selinux_label"]
defer c.nri.BlockPluginSync().Unblock()
err = c.nri.RunPodSandbox(ctx, &sandbox)
if err != nil {
return nil, fmt.Errorf("NRI RunPodSandbox failed: %w", err)

View File

@ -46,6 +46,9 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
// https://github.com/kubernetes/cri-api/blob/c20fa40/pkg/apis/runtime/v1/api.proto#L45-L46
return &runtime.StopPodSandboxResponse{}, nil
}
defer c.nri.BlockPluginSync().Unblock()
span.SetAttributes(tracing.Attribute("sandbox.id", sandbox.ID))
if err := c.stopPodSandbox(ctx, sandbox); err != nil {
return nil, err

View File

@ -81,6 +81,9 @@ type API interface {
// RemoveContainer relays container removal events to NRI.
RemoveContainer(context.Context, PodSandbox, Container) error
// BlockPluginSync blocks plugin synchronization until it is Unblock()ed.
BlockPluginSync() *PluginSyncBlock
}
type State int
@ -435,6 +438,15 @@ func (l *local) RemoveContainer(ctx context.Context, pod PodSandbox, ctr Contain
return err
}
type PluginSyncBlock = nri.PluginSyncBlock
func (l *local) BlockPluginSync() *PluginSyncBlock {
if !l.IsEnabled() {
return nil
}
return l.nri.BlockPluginSync()
}
func (l *local) syncPlugin(ctx context.Context, syncFn nri.SyncCB) error {
l.Lock()
defer l.Unlock()