cri: ensure NRI API never has nil CRI

A nil CRIImplementation field can cause a nil pointer dereference and
panic during startup recovery.

Prior to this change, the nri.API struct would have a nil cri
(CRIImplementation) field after nri.NewAPI until nri.Register was
called.  Register is called mid-way through initialization of the CRI
plugin, but recovery for containers occurs prior to that.  Container
recovery includes establishing new exit monitors for existing containers
that were discovered.  When a container exits, NRI plugins are given the
opportunity to be notified about the lifecycle event, and this is done
by accessing that CRIImplementation field inside the nri.API.  If a
container exits prior to nri.Register being called, access to the
CRIImplementation field can cause a panic.

Here's the call-path:

* The CRI plugin starts running
  [here](ae71819c4f/pkg/cri/server/service.go (L222))
* It then [calls into](ae71819c4f/pkg/cri/server/service.go (L227))
  `recover()` to recover state from previous runs of containerd
* `recover()` then attempts to recover all containers through
  [`loadContainer()`](ae7d74b9e2/internal/cri/server/restart.go (L175))
* When `loadContainer()` finds a container that is still running, it waits
  for the task (internal containerd object) to exit and sets up
  [exit monitoring](ae7d74b9e2/internal/cri/server/restart.go (L391))
* Any exit that then happens must be
  [handled](ae7d74b9e2/internal/cri/server/events.go (L145))
* Handling an exit includes
  [deleting the Task](ae7d74b9e2/internal/cri/server/events.go (L188))
  and specifying [`nri.WithContainerExit`](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L348))
  to [notify](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L356))
  any subscribed NRI plugins
* NRI plugins need to know information about the pod (not just the sandbox),
  so before a plugin is notified the NRI API package
  [queries the Sandbox Store](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L232))
  through the CRI implementation
* The `cri` implementation member field in the `nri.API` struct is set as part of the
  [`Register()`](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L66)) method
* The `nri.Register()` method is only called
  [much further down in the CRI `Run()` method](ae71819c4f/pkg/cri/server/service.go (L279))

Signed-off-by: Samuel Karp <samuelkarp@google.com>
This commit is contained in:
Samuel Karp 2024-06-28 00:30:37 -07:00
parent ae7d74b9e2
commit 10aec359a0
No known key found for this signature in database
GPG Key ID: 997C5A3CD3167CB5
5 changed files with 13 additions and 13 deletions

View File

@ -46,9 +46,10 @@ type API struct {
nri nri.API nri nri.API
} }
func NewAPI(nri nri.API) *API { func NewAPI(nri nri.API, cri CRIImplementation) *API {
return &API{ return &API{
nri: nri, nri: nri,
cri: cri,
} }
} }
@ -58,12 +59,11 @@ func (a *API) IsDisabled() bool {
func (a *API) IsEnabled() bool { return !a.IsDisabled() } func (a *API) IsEnabled() bool { return !a.IsDisabled() }
func (a *API) Register(cri CRIImplementation) error { func (a *API) Register() error {
if a.IsDisabled() { if a.IsDisabled() {
return nil return nil
} }
a.cri = cri
nri.RegisterDomain(a) nri.RegisterDomain(a)
return a.nri.Start() return a.nri.Start()

View File

@ -37,11 +37,11 @@ import (
type API struct { type API struct {
} }
func NewAPI(nri.API) *API { func NewAPI(nri.API, CRIImplementation) *API {
return nil return nil
} }
func (a *API) Register(CRIImplementation) error { func (a *API) Register() error {
return nil return nil
} }

View File

@ -35,6 +35,7 @@ import (
"k8s.io/kubelet/pkg/cri/streaming" "k8s.io/kubelet/pkg/cri/streaming"
apitypes "github.com/containerd/containerd/api/types" apitypes "github.com/containerd/containerd/api/types"
containerd "github.com/containerd/containerd/v2/client" containerd "github.com/containerd/containerd/v2/client"
"github.com/containerd/containerd/v2/core/introspection" "github.com/containerd/containerd/v2/core/introspection"
_ "github.com/containerd/containerd/v2/core/runtime" // for typeurl init _ "github.com/containerd/containerd/v2/core/runtime" // for typeurl init
@ -50,6 +51,7 @@ import (
snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot" snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot"
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util" ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
"github.com/containerd/containerd/v2/internal/eventq" "github.com/containerd/containerd/v2/internal/eventq"
nriservice "github.com/containerd/containerd/v2/internal/nri"
"github.com/containerd/containerd/v2/internal/registrar" "github.com/containerd/containerd/v2/internal/registrar"
"github.com/containerd/containerd/v2/pkg/oci" "github.com/containerd/containerd/v2/pkg/oci"
osinterface "github.com/containerd/containerd/v2/pkg/os" osinterface "github.com/containerd/containerd/v2/pkg/os"
@ -164,7 +166,7 @@ type CRIServiceOptions struct {
StreamingConfig streaming.Config StreamingConfig streaming.Config
NRI *nri.API NRI nriservice.API
// SandboxControllers is a map of all the loaded sandbox controllers // SandboxControllers is a map of all the loaded sandbox controllers
SandboxControllers map[string]sandbox.Controller SandboxControllers map[string]sandbox.Controller
@ -236,7 +238,7 @@ func NewCRIService(options *CRIServiceOptions) (CRIService, runtime.RuntimeServi
} }
} }
c.nri = options.NRI c.nri = nri.NewAPI(options.NRI, &criImplementation{c})
c.runtimeHandlers, err = c.introspectRuntimeHandlers(ctx) c.runtimeHandlers, err = c.introspectRuntimeHandlers(ctx)
if err != nil { if err != nil {
@ -297,7 +299,7 @@ func (c *criService) Run(ready func()) error {
}() }()
// register CRI domain with NRI // register CRI domain with NRI
if err := c.nri.Register(&criImplementation{c}); err != nil { if err := c.nri.Register(); err != nil {
return fmt.Errorf("failed to set up NRI for CRI service: %w", err) return fmt.Errorf("failed to set up NRI for CRI service: %w", err)
} }

View File

@ -38,7 +38,7 @@ type API interface {
// IsEnabled returns true if the NRI interface is enabled and initialized. // IsEnabled returns true if the NRI interface is enabled and initialized.
IsEnabled() bool IsEnabled() bool
// Start start the NRI interface, allowing external NRI plugins to // Start starts the NRI interface, allowing external NRI plugins to
// connect, register, and hook themselves into the lifecycle events // connect, register, and hook themselves into the lifecycle events
// of pods and containers. // of pods and containers.
Start() error Start() error

View File

@ -32,7 +32,6 @@ import (
criconfig "github.com/containerd/containerd/v2/internal/cri/config" criconfig "github.com/containerd/containerd/v2/internal/cri/config"
"github.com/containerd/containerd/v2/internal/cri/constants" "github.com/containerd/containerd/v2/internal/cri/constants"
"github.com/containerd/containerd/v2/internal/cri/instrument" "github.com/containerd/containerd/v2/internal/cri/instrument"
"github.com/containerd/containerd/v2/internal/cri/nri"
"github.com/containerd/containerd/v2/internal/cri/server" "github.com/containerd/containerd/v2/internal/cri/server"
nriservice "github.com/containerd/containerd/v2/internal/nri" nriservice "github.com/containerd/containerd/v2/internal/nri"
"github.com/containerd/containerd/v2/plugins" "github.com/containerd/containerd/v2/plugins"
@ -212,7 +211,7 @@ func (c criGRPCServerWithTCP) RegisterTCP(s *grpc.Server) error {
} }
// Get the NRI plugin, and set up our NRI API for it. // Get the NRI plugin, and set up our NRI API for it.
func getNRIAPI(ic *plugin.InitContext) *nri.API { func getNRIAPI(ic *plugin.InitContext) nriservice.API {
const ( const (
pluginType = plugins.NRIApiPlugin pluginType = plugins.NRIApiPlugin
pluginName = "nri" pluginName = "nri"
@ -234,8 +233,7 @@ func getNRIAPI(ic *plugin.InitContext) *nri.API {
} }
log.G(ctx).Info("using experimental NRI integration - disable nri plugin to prevent this") log.G(ctx).Info("using experimental NRI integration - disable nri plugin to prevent this")
return api
return nri.NewAPI(api)
} }
func getSandboxControllers(ic *plugin.InitContext) (map[string]sandbox.Controller, error) { func getSandboxControllers(ic *plugin.InitContext) (map[string]sandbox.Controller, error) {