 10aec359a0
			
		
	
	10aec359a0
	
	
	
		
			
			A nil CRIImplementation field can cause a nil pointer dereference and panic during startup recovery. Prior to this change, the nri.API struct would have a nil cri (CRIImplementation) field after nri.NewAPI until nri.Register was called. Register is called mid-way through initialization of the CRI plugin, but recovery for containers occurs prior to that. Container recovery includes establishing new exit monitors for existing containers that were discovered. When a container exits, NRI plugins are given the opportunity to be notified about the lifecycle event, and this is done by accessing that CRIImplementation field inside the nri.API. If a container exits prior to nri.Register being called, access to the CRIImplementation field can cause a panic. Here's the call-path: * The CRI plugin starts running [here](ae71819c4f/pkg/cri/server/service.go (L222)) * It then [calls into](ae71819c4f/pkg/cri/server/service.go (L227)) `recover()` to recover state from previous runs of containerd * `recover()` then attempts to recover all containers through [`loadContainer()`](ae7d74b9e2/internal/cri/server/restart.go (L175)) * When `loadContainer()` finds a container that is still running, it waits for the task (internal containerd object) to exit and sets up [exit monitoring](ae7d74b9e2/internal/cri/server/restart.go (L391)) * Any exit that then happens must be [handled](ae7d74b9e2/internal/cri/server/events.go (L145)) * Handling an exit includes [deleting the Task](ae7d74b9e2/internal/cri/server/events.go (L188)) and specifying [`nri.WithContainerExit`](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L348)) to [notify](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L356)) any subscribed NRI plugins * NRI plugins need to know information about the pod (not just the sandbox), so before a plugin is notified the NRI API package [queries the Sandbox Store](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L232)) through the CRI implementation * The `cri` implementation member field in the `nri.API` struct is set as part of the [`Register()`](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L66)) method * The `nri.Register()` method is only called [much further down in the CRI `Run()` method](ae71819c4f/pkg/cri/server/service.go (L279)) Signed-off-by: Samuel Karp <samuelkarp@google.com>
		
			
				
	
	
		
			527 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			527 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
|    Copyright The containerd Authors.
 | |
| 
 | |
|    Licensed under the Apache License, Version 2.0 (the "License");
 | |
|    you may not use this file except in compliance with the License.
 | |
|    You may obtain a copy of the License at
 | |
| 
 | |
|        http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
|    Unless required by applicable law or agreed to in writing, software
 | |
|    distributed under the License is distributed on an "AS IS" BASIS,
 | |
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|    See the License for the specific language governing permissions and
 | |
|    limitations under the License.
 | |
| */
 | |
| 
 | |
| package nri
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"fmt"
 | |
| 	"path"
 | |
| 	"sync"
 | |
| 
 | |
| 	"github.com/containerd/log"
 | |
| 
 | |
| 	"github.com/containerd/containerd/v2/version"
 | |
| 	nri "github.com/containerd/nri/pkg/adaptation"
 | |
| )
 | |
| 
 | |
| // API implements a common API for interfacing NRI from containerd. It is
 | |
| // agnostic to any internal containerd implementation details of pods and
 | |
| // containers. It needs corresponding Domain interfaces for each containerd
 | |
| // namespace it needs to handle. These domains take care of the namespace-
 | |
| // specific details of providing pod and container metadata to NRI and of
 | |
| // applying NRI-requested adjustments to the state of containers.
 | |
| type API interface {
 | |
| 	// IsEnabled returns true if the NRI interface is enabled and initialized.
 | |
| 	IsEnabled() bool
 | |
| 
 | |
| 	// Start starts the NRI interface, allowing external NRI plugins to
 | |
| 	// connect, register, and hook themselves into the lifecycle events
 | |
| 	// of pods and containers.
 | |
| 	Start() error
 | |
| 
 | |
| 	// Stop stops the NRI interface.
 | |
| 	Stop()
 | |
| 
 | |
| 	// RunPodSandbox relays pod creation events to NRI.
 | |
| 	RunPodSandbox(context.Context, PodSandbox) error
 | |
| 
 | |
| 	// StopPodSandbox relays pod shutdown events to NRI.
 | |
| 	StopPodSandbox(context.Context, PodSandbox) error
 | |
| 
 | |
| 	// RemovePodSandbox relays pod removal events to NRI.
 | |
| 	RemovePodSandbox(context.Context, PodSandbox) error
 | |
| 
 | |
| 	// CreateContainer relays container creation requests to NRI.
 | |
| 	CreateContainer(context.Context, PodSandbox, Container) (*nri.ContainerAdjustment, error)
 | |
| 
 | |
| 	// PostCreateContainer relays successful container creation events to NRI.
 | |
| 	PostCreateContainer(context.Context, PodSandbox, Container) error
 | |
| 
 | |
| 	// StartContainer relays container start request notifications to NRI.
 | |
| 	StartContainer(context.Context, PodSandbox, Container) error
 | |
| 
 | |
| 	// PostStartContainer relays successful container startup events to NRI.
 | |
| 	PostStartContainer(context.Context, PodSandbox, Container) error
 | |
| 
 | |
| 	// UpdateContainer relays container update requests to NRI.
 | |
| 	UpdateContainer(context.Context, PodSandbox, Container, *nri.LinuxResources) (*nri.LinuxResources, error)
 | |
| 
 | |
| 	// PostUpdateContainer relays successful container update events to NRI.
 | |
| 	PostUpdateContainer(context.Context, PodSandbox, Container) error
 | |
| 
 | |
| 	// StopContainer relays container stop requests to NRI.
 | |
| 	StopContainer(context.Context, PodSandbox, Container) error
 | |
| 
 | |
| 	// NotifyContainerExit handles the exit event of a container.
 | |
| 	NotifyContainerExit(context.Context, PodSandbox, Container)
 | |
| 
 | |
| 	// RemoveContainer relays container removal events to NRI.
 | |
| 	RemoveContainer(context.Context, PodSandbox, Container) error
 | |
| }
 | |
| 
 | |
| type State int
 | |
| 
 | |
| const (
 | |
| 	Created State = iota + 1
 | |
| 	Running
 | |
| 	Stopped
 | |
| 	Removed
 | |
| )
 | |
| 
 | |
| type local struct {
 | |
| 	sync.Mutex
 | |
| 	cfg *Config
 | |
| 	nri *nri.Adaptation
 | |
| 
 | |
| 	state map[string]State
 | |
| }
 | |
| 
 | |
| var _ API = &local{}
 | |
| 
 | |
| // New creates an instance of the NRI interface with the given configuration.
 | |
| func New(cfg *Config) (API, error) {
 | |
| 	l := &local{
 | |
| 		cfg: cfg,
 | |
| 	}
 | |
| 
 | |
| 	if cfg.Disable {
 | |
| 		log.L.Info("NRI interface is disabled by configuration.")
 | |
| 		return l, nil
 | |
| 	}
 | |
| 
 | |
| 	var (
 | |
| 		name     = path.Base(version.Package)
 | |
| 		version  = version.Version
 | |
| 		opts     = cfg.toOptions()
 | |
| 		syncFn   = l.syncPlugin
 | |
| 		updateFn = l.updateFromPlugin
 | |
| 		err      error
 | |
| 	)
 | |
| 
 | |
| 	cfg.ConfigureTimeouts()
 | |
| 
 | |
| 	l.nri, err = nri.New(name, version, syncFn, updateFn, opts...)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("failed to initialize NRI interface: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	l.state = make(map[string]State)
 | |
| 
 | |
| 	log.L.Info("created NRI interface")
 | |
| 
 | |
| 	return l, nil
 | |
| }
 | |
| 
 | |
| func (l *local) IsEnabled() bool {
 | |
| 	return l != nil && !l.cfg.Disable
 | |
| }
 | |
| 
 | |
| func (l *local) Start() error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	err := l.nri.Start()
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("failed to start NRI interface: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (l *local) Stop() {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	l.nri.Stop()
 | |
| 	l.nri = nil
 | |
| }
 | |
| 
 | |
| func (l *local) RunPodSandbox(ctx context.Context, pod PodSandbox) error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	request := &nri.RunPodSandboxRequest{
 | |
| 		Pod: podSandboxToNRI(pod),
 | |
| 	}
 | |
| 
 | |
| 	err := l.nri.RunPodSandbox(ctx, request)
 | |
| 	l.setState(pod.GetID(), Running)
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| func (l *local) StopPodSandbox(ctx context.Context, pod PodSandbox) error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	if !l.needsStopping(pod.GetID()) {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	request := &nri.StopPodSandboxRequest{
 | |
| 		Pod: podSandboxToNRI(pod),
 | |
| 	}
 | |
| 
 | |
| 	err := l.nri.StopPodSandbox(ctx, request)
 | |
| 	l.setState(pod.GetID(), Stopped)
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| func (l *local) RemovePodSandbox(ctx context.Context, pod PodSandbox) error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	if !l.needsRemoval(pod.GetID()) {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	request := &nri.RemovePodSandboxRequest{
 | |
| 		Pod: podSandboxToNRI(pod),
 | |
| 	}
 | |
| 
 | |
| 	err := l.nri.RemovePodSandbox(ctx, request)
 | |
| 	l.setState(pod.GetID(), Removed)
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| func (l *local) CreateContainer(ctx context.Context, pod PodSandbox, ctr Container) (*nri.ContainerAdjustment, error) {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	request := &nri.CreateContainerRequest{
 | |
| 		Pod:       podSandboxToNRI(pod),
 | |
| 		Container: containerToNRI(ctr),
 | |
| 	}
 | |
| 
 | |
| 	response, err := l.nri.CreateContainer(ctx, request)
 | |
| 	l.setState(request.Container.Id, Created)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	_, err = l.evictContainers(ctx, response.Evict)
 | |
| 	if err != nil {
 | |
| 		// TODO(klihub): we ignore pre-create eviction failures for now
 | |
| 		log.G(ctx).WithError(err).Warnf("pre-create eviction failed")
 | |
| 	}
 | |
| 
 | |
| 	if _, err := l.applyUpdates(ctx, response.Update); err != nil {
 | |
| 		// TODO(klihub): we ignore pre-create update failures for now
 | |
| 		log.G(ctx).WithError(err).Warnf("pre-create update failed")
 | |
| 	}
 | |
| 
 | |
| 	return response.Adjust, nil
 | |
| }
 | |
| 
 | |
| func (l *local) PostCreateContainer(ctx context.Context, pod PodSandbox, ctr Container) error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	request := &nri.PostCreateContainerRequest{
 | |
| 		Pod:       podSandboxToNRI(pod),
 | |
| 		Container: containerToNRI(ctr),
 | |
| 	}
 | |
| 
 | |
| 	return l.nri.PostCreateContainer(ctx, request)
 | |
| }
 | |
| 
 | |
| func (l *local) StartContainer(ctx context.Context, pod PodSandbox, ctr Container) error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	request := &nri.StartContainerRequest{
 | |
| 		Pod:       podSandboxToNRI(pod),
 | |
| 		Container: containerToNRI(ctr),
 | |
| 	}
 | |
| 
 | |
| 	err := l.nri.StartContainer(ctx, request)
 | |
| 	l.setState(request.Container.Id, Running)
 | |
| 
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| func (l *local) PostStartContainer(ctx context.Context, pod PodSandbox, ctr Container) error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	request := &nri.PostStartContainerRequest{
 | |
| 		Pod:       podSandboxToNRI(pod),
 | |
| 		Container: containerToNRI(ctr),
 | |
| 	}
 | |
| 
 | |
| 	return l.nri.PostStartContainer(ctx, request)
 | |
| }
 | |
| 
 | |
| func (l *local) UpdateContainer(ctx context.Context, pod PodSandbox, ctr Container, req *nri.LinuxResources) (*nri.LinuxResources, error) {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	request := &nri.UpdateContainerRequest{
 | |
| 		Pod:            podSandboxToNRI(pod),
 | |
| 		Container:      containerToNRI(ctr),
 | |
| 		LinuxResources: req,
 | |
| 	}
 | |
| 
 | |
| 	response, err := l.nri.UpdateContainer(ctx, request)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	_, err = l.evictContainers(ctx, response.Evict)
 | |
| 	if err != nil {
 | |
| 		// TODO(klihub): we ignore pre-update eviction failures for now
 | |
| 		log.G(ctx).WithError(err).Warnf("pre-update eviction failed")
 | |
| 	}
 | |
| 
 | |
| 	cnt := len(response.Update)
 | |
| 	if cnt == 0 {
 | |
| 		return nil, nil
 | |
| 	}
 | |
| 
 | |
| 	if cnt > 1 {
 | |
| 		_, err = l.applyUpdates(ctx, response.Update[0:cnt-1])
 | |
| 		if err != nil {
 | |
| 			// TODO(klihub): we ignore pre-update update failures for now
 | |
| 			log.G(ctx).WithError(err).Warnf("pre-update update failed")
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return response.Update[cnt-1].GetLinux().GetResources(), nil
 | |
| }
 | |
| 
 | |
| func (l *local) PostUpdateContainer(ctx context.Context, pod PodSandbox, ctr Container) error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	request := &nri.PostUpdateContainerRequest{
 | |
| 		Pod:       podSandboxToNRI(pod),
 | |
| 		Container: containerToNRI(ctr),
 | |
| 	}
 | |
| 
 | |
| 	return l.nri.PostUpdateContainer(ctx, request)
 | |
| }
 | |
| 
 | |
| func (l *local) StopContainer(ctx context.Context, pod PodSandbox, ctr Container) error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	return l.stopContainer(ctx, pod, ctr)
 | |
| }
 | |
| 
 | |
| func (l *local) NotifyContainerExit(ctx context.Context, pod PodSandbox, ctr Container) {
 | |
| 	go func() {
 | |
| 		l.Lock()
 | |
| 		defer l.Unlock()
 | |
| 		l.stopContainer(ctx, pod, ctr)
 | |
| 	}()
 | |
| }
 | |
| 
 | |
| func (l *local) stopContainer(ctx context.Context, pod PodSandbox, ctr Container) error {
 | |
| 	if !l.needsStopping(ctr.GetID()) {
 | |
| 		log.G(ctx).Tracef("NRI stopContainer: container %s does not need stopping",
 | |
| 			ctr.GetID())
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	request := &nri.StopContainerRequest{
 | |
| 		Pod:       podSandboxToNRI(pod),
 | |
| 		Container: containerToNRI(ctr),
 | |
| 	}
 | |
| 
 | |
| 	response, err := l.nri.StopContainer(ctx, request)
 | |
| 	l.setState(request.Container.Id, Stopped)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	_, err = l.applyUpdates(ctx, response.Update)
 | |
| 	if err != nil {
 | |
| 		// TODO(klihub): we ignore post-stop update failures for now
 | |
| 		log.G(ctx).WithError(err).Warnf("post-stop update failed")
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (l *local) RemoveContainer(ctx context.Context, pod PodSandbox, ctr Container) error {
 | |
| 	if !l.IsEnabled() {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	if !l.needsRemoval(ctr.GetID()) {
 | |
| 		return nil
 | |
| 	}
 | |
| 
 | |
| 	l.stopContainer(ctx, pod, ctr)
 | |
| 
 | |
| 	request := &nri.RemoveContainerRequest{
 | |
| 		Pod:       podSandboxToNRI(pod),
 | |
| 		Container: containerToNRI(ctr),
 | |
| 	}
 | |
| 	err := l.nri.RemoveContainer(ctx, request)
 | |
| 	l.setState(request.Container.Id, Removed)
 | |
| 
 | |
| 	return err
 | |
| }
 | |
| 
 | |
| func (l *local) syncPlugin(ctx context.Context, syncFn nri.SyncCB) error {
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	log.G(ctx).Info("Synchronizing NRI (plugin) with current runtime state")
 | |
| 
 | |
| 	pods := podSandboxesToNRI(domains.listPodSandboxes())
 | |
| 	containers := containersToNRI(domains.listContainers())
 | |
| 
 | |
| 	for _, ctr := range containers {
 | |
| 		switch ctr.GetState() {
 | |
| 		case nri.ContainerState_CONTAINER_CREATED:
 | |
| 			l.setState(ctr.GetId(), Created)
 | |
| 		case nri.ContainerState_CONTAINER_RUNNING, nri.ContainerState_CONTAINER_PAUSED:
 | |
| 			l.setState(ctr.GetId(), Running)
 | |
| 		case nri.ContainerState_CONTAINER_STOPPED:
 | |
| 			l.setState(ctr.GetId(), Stopped)
 | |
| 		default:
 | |
| 			l.setState(ctr.GetId(), Removed)
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	updates, err := syncFn(ctx, pods, containers)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	_, err = l.applyUpdates(ctx, updates)
 | |
| 	if err != nil {
 | |
| 		// TODO(klihub): we ignore post-sync update failures for now
 | |
| 		log.G(ctx).WithError(err).Warnf("post-sync update failed")
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (l *local) updateFromPlugin(ctx context.Context, req []*nri.ContainerUpdate) ([]*nri.ContainerUpdate, error) {
 | |
| 	l.Lock()
 | |
| 	defer l.Unlock()
 | |
| 
 | |
| 	log.G(ctx).Trace("Unsolicited NRI container updates")
 | |
| 
 | |
| 	failed, err := l.applyUpdates(ctx, req)
 | |
| 	return failed, err
 | |
| }
 | |
| 
 | |
| func (l *local) applyUpdates(ctx context.Context, updates []*nri.ContainerUpdate) ([]*nri.ContainerUpdate, error) {
 | |
| 	// TODO(klihub): should we pre-save state and attempt a rollback on failure ?
 | |
| 	failed, err := domains.updateContainers(ctx, updates)
 | |
| 	return failed, err
 | |
| }
 | |
| 
 | |
| func (l *local) evictContainers(ctx context.Context, evict []*nri.ContainerEviction) ([]*nri.ContainerEviction, error) {
 | |
| 	failed, err := domains.evictContainers(ctx, evict)
 | |
| 	return failed, err
 | |
| }
 | |
| 
 | |
| func (l *local) setState(id string, state State) {
 | |
| 	if state != Removed {
 | |
| 		l.state[id] = state
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	delete(l.state, id)
 | |
| }
 | |
| 
 | |
| func (l *local) getState(id string) State {
 | |
| 	if state, ok := l.state[id]; ok {
 | |
| 		return state
 | |
| 	}
 | |
| 
 | |
| 	return Removed
 | |
| }
 | |
| 
 | |
| func (l *local) needsStopping(id string) bool {
 | |
| 	s := l.getState(id)
 | |
| 	if s == Created || s == Running {
 | |
| 		return true
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| func (l *local) needsRemoval(id string) bool {
 | |
| 	s := l.getState(id)
 | |
| 	if s == Created || s == Running || s == Stopped {
 | |
| 		return true
 | |
| 	}
 | |
| 	return false
 | |
| }
 |