cri: ensure NRI API never has nil CRI
A nil CRIImplementation field can cause a nil pointer dereference and panic during startup recovery. Prior to this change, the nri.API struct would have a nil cri (CRIImplementation) field after nri.NewAPI until nri.Register was called. Register is called mid-way through initialization of the CRI plugin, but recovery for containers occurs prior to that. Container recovery includes establishing new exit monitors for existing containers that were discovered. When a container exits, NRI plugins are given the opportunity to be notified about the lifecycle event, and this is done by accessing that CRIImplementation field inside the nri.API. If a container exits prior to nri.Register being called, access to the CRIImplementation field can cause a panic. Here's the call-path: * The CRI plugin starts running [here](ae71819c4f/pkg/cri/server/service.go (L222)
) * It then [calls into](ae71819c4f/pkg/cri/server/service.go (L227)
) `recover()` to recover state from previous runs of containerd * `recover()` then attempts to recover all containers through [`loadContainer()`](ae7d74b9e2/internal/cri/server/restart.go (L175)
) * When `loadContainer()` finds a container that is still running, it waits for the task (internal containerd object) to exit and sets up [exit monitoring](ae7d74b9e2/internal/cri/server/restart.go (L391)
) * Any exit that then happens must be [handled](ae7d74b9e2/internal/cri/server/events.go (L145)
) * Handling an exit includes [deleting the Task](ae7d74b9e2/internal/cri/server/events.go (L188)
) and specifying [`nri.WithContainerExit`](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L348)
) to [notify](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L356)
) any subscribed NRI plugins * NRI plugins need to know information about the pod (not just the sandbox), so before a plugin is notified the NRI API package [queries the Sandbox Store](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L232)
) through the CRI implementation * The `cri` implementation member field in the `nri.API` struct is set as part of the [`Register()`](ae7d74b9e2/internal/cri/nri/nri_api_linux.go (L66)
) method * The `nri.Register()` method is only called [much further down in the CRI `Run()` method](ae71819c4f/pkg/cri/server/service.go (L279)
) Signed-off-by: Samuel Karp <samuelkarp@google.com>
This commit is contained in:
parent
ae7d74b9e2
commit
10aec359a0
@ -46,9 +46,10 @@ type API struct {
|
|||||||
nri nri.API
|
nri nri.API
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewAPI(nri nri.API) *API {
|
func NewAPI(nri nri.API, cri CRIImplementation) *API {
|
||||||
return &API{
|
return &API{
|
||||||
nri: nri,
|
nri: nri,
|
||||||
|
cri: cri,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -58,12 +59,11 @@ func (a *API) IsDisabled() bool {
|
|||||||
|
|
||||||
func (a *API) IsEnabled() bool { return !a.IsDisabled() }
|
func (a *API) IsEnabled() bool { return !a.IsDisabled() }
|
||||||
|
|
||||||
func (a *API) Register(cri CRIImplementation) error {
|
func (a *API) Register() error {
|
||||||
if a.IsDisabled() {
|
if a.IsDisabled() {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
a.cri = cri
|
|
||||||
nri.RegisterDomain(a)
|
nri.RegisterDomain(a)
|
||||||
|
|
||||||
return a.nri.Start()
|
return a.nri.Start()
|
||||||
|
@ -37,11 +37,11 @@ import (
|
|||||||
type API struct {
|
type API struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewAPI(nri.API) *API {
|
func NewAPI(nri.API, CRIImplementation) *API {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *API) Register(CRIImplementation) error {
|
func (a *API) Register() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,6 +35,7 @@ import (
|
|||||||
"k8s.io/kubelet/pkg/cri/streaming"
|
"k8s.io/kubelet/pkg/cri/streaming"
|
||||||
|
|
||||||
apitypes "github.com/containerd/containerd/api/types"
|
apitypes "github.com/containerd/containerd/api/types"
|
||||||
|
|
||||||
containerd "github.com/containerd/containerd/v2/client"
|
containerd "github.com/containerd/containerd/v2/client"
|
||||||
"github.com/containerd/containerd/v2/core/introspection"
|
"github.com/containerd/containerd/v2/core/introspection"
|
||||||
_ "github.com/containerd/containerd/v2/core/runtime" // for typeurl init
|
_ "github.com/containerd/containerd/v2/core/runtime" // for typeurl init
|
||||||
@ -50,6 +51,7 @@ import (
|
|||||||
snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot"
|
snapshotstore "github.com/containerd/containerd/v2/internal/cri/store/snapshot"
|
||||||
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
|
ctrdutil "github.com/containerd/containerd/v2/internal/cri/util"
|
||||||
"github.com/containerd/containerd/v2/internal/eventq"
|
"github.com/containerd/containerd/v2/internal/eventq"
|
||||||
|
nriservice "github.com/containerd/containerd/v2/internal/nri"
|
||||||
"github.com/containerd/containerd/v2/internal/registrar"
|
"github.com/containerd/containerd/v2/internal/registrar"
|
||||||
"github.com/containerd/containerd/v2/pkg/oci"
|
"github.com/containerd/containerd/v2/pkg/oci"
|
||||||
osinterface "github.com/containerd/containerd/v2/pkg/os"
|
osinterface "github.com/containerd/containerd/v2/pkg/os"
|
||||||
@ -164,7 +166,7 @@ type CRIServiceOptions struct {
|
|||||||
|
|
||||||
StreamingConfig streaming.Config
|
StreamingConfig streaming.Config
|
||||||
|
|
||||||
NRI *nri.API
|
NRI nriservice.API
|
||||||
|
|
||||||
// SandboxControllers is a map of all the loaded sandbox controllers
|
// SandboxControllers is a map of all the loaded sandbox controllers
|
||||||
SandboxControllers map[string]sandbox.Controller
|
SandboxControllers map[string]sandbox.Controller
|
||||||
@ -236,7 +238,7 @@ func NewCRIService(options *CRIServiceOptions) (CRIService, runtime.RuntimeServi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
c.nri = options.NRI
|
c.nri = nri.NewAPI(options.NRI, &criImplementation{c})
|
||||||
|
|
||||||
c.runtimeHandlers, err = c.introspectRuntimeHandlers(ctx)
|
c.runtimeHandlers, err = c.introspectRuntimeHandlers(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -297,7 +299,7 @@ func (c *criService) Run(ready func()) error {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
// register CRI domain with NRI
|
// register CRI domain with NRI
|
||||||
if err := c.nri.Register(&criImplementation{c}); err != nil {
|
if err := c.nri.Register(); err != nil {
|
||||||
return fmt.Errorf("failed to set up NRI for CRI service: %w", err)
|
return fmt.Errorf("failed to set up NRI for CRI service: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ type API interface {
|
|||||||
// IsEnabled returns true if the NRI interface is enabled and initialized.
|
// IsEnabled returns true if the NRI interface is enabled and initialized.
|
||||||
IsEnabled() bool
|
IsEnabled() bool
|
||||||
|
|
||||||
// Start start the NRI interface, allowing external NRI plugins to
|
// Start starts the NRI interface, allowing external NRI plugins to
|
||||||
// connect, register, and hook themselves into the lifecycle events
|
// connect, register, and hook themselves into the lifecycle events
|
||||||
// of pods and containers.
|
// of pods and containers.
|
||||||
Start() error
|
Start() error
|
||||||
|
@ -32,7 +32,6 @@ import (
|
|||||||
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
|
criconfig "github.com/containerd/containerd/v2/internal/cri/config"
|
||||||
"github.com/containerd/containerd/v2/internal/cri/constants"
|
"github.com/containerd/containerd/v2/internal/cri/constants"
|
||||||
"github.com/containerd/containerd/v2/internal/cri/instrument"
|
"github.com/containerd/containerd/v2/internal/cri/instrument"
|
||||||
"github.com/containerd/containerd/v2/internal/cri/nri"
|
|
||||||
"github.com/containerd/containerd/v2/internal/cri/server"
|
"github.com/containerd/containerd/v2/internal/cri/server"
|
||||||
nriservice "github.com/containerd/containerd/v2/internal/nri"
|
nriservice "github.com/containerd/containerd/v2/internal/nri"
|
||||||
"github.com/containerd/containerd/v2/plugins"
|
"github.com/containerd/containerd/v2/plugins"
|
||||||
@ -212,7 +211,7 @@ func (c criGRPCServerWithTCP) RegisterTCP(s *grpc.Server) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Get the NRI plugin, and set up our NRI API for it.
|
// Get the NRI plugin, and set up our NRI API for it.
|
||||||
func getNRIAPI(ic *plugin.InitContext) *nri.API {
|
func getNRIAPI(ic *plugin.InitContext) nriservice.API {
|
||||||
const (
|
const (
|
||||||
pluginType = plugins.NRIApiPlugin
|
pluginType = plugins.NRIApiPlugin
|
||||||
pluginName = "nri"
|
pluginName = "nri"
|
||||||
@ -234,8 +233,7 @@ func getNRIAPI(ic *plugin.InitContext) *nri.API {
|
|||||||
}
|
}
|
||||||
|
|
||||||
log.G(ctx).Info("using experimental NRI integration - disable nri plugin to prevent this")
|
log.G(ctx).Info("using experimental NRI integration - disable nri plugin to prevent this")
|
||||||
|
return api
|
||||||
return nri.NewAPI(api)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getSandboxControllers(ic *plugin.InitContext) (map[string]sandbox.Controller, error) {
|
func getSandboxControllers(ic *plugin.InitContext) (map[string]sandbox.Controller, error) {
|
||||||
|
Loading…
Reference in New Issue
Block a user