kubernetes/pkg/kubelet/kuberuntime/kuberuntime_termination_order.go
Todd Neal 7bcc98c46b sidecars: terminate sidecars after main containers
Sidecars should terminate:
- after all main containers have exited
- serialized and in reverse order
2023-10-17 19:07:21 -05:00

115 lines
3.8 KiB
Go

/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kuberuntime
import (
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/types"
)
// terminationOrdering is used to enforce a termination ordering for sidecar containers. It sets up
// dependencies between sidecars and allows the pod termination process to wait until the grace period
// expires, or all dependent containers have finished terminating.
type terminationOrdering struct {
// terminated is a map from container name to a channel, that if closed
// indicates that the container with that name was terminated
terminated map[string]chan struct{}
// prereqs is a map from container name to a list of channel that the container
// must wait on to ensure termination ordering
prereqs map[string][]chan struct{}
}
// newTerminationOrdering constructs a terminationOrdering based on the pod spec and the currently running containers.
func newTerminationOrdering(pod *v1.Pod, runningContainerNames []string) *terminationOrdering {
to := &terminationOrdering{
prereqs: map[string][]chan struct{}{},
terminated: map[string]chan struct{}{},
}
runningContainers := map[string]struct{}{}
for _, name := range runningContainerNames {
runningContainers[name] = struct{}{}
}
var mainContainerChannels []chan struct{}
// sidecar containers need to wait on main containers, so we create a channel per main container
// for them to wait on
for _, c := range pod.Spec.Containers {
channel := make(chan struct{})
to.terminated[c.Name] = channel
mainContainerChannels = append(mainContainerChannels, channel)
// if its not a running container, pre-close the channel so nothing waits on it
if _, isRunning := runningContainers[c.Name]; !isRunning {
close(channel)
}
}
var previousSidecarName string
for i := range pod.Spec.InitContainers {
// get the init containers in reverse order
ic := pod.Spec.InitContainers[len(pod.Spec.InitContainers)-i-1]
to.terminated[ic.Name] = make(chan struct{})
if types.IsRestartableInitContainer(&ic) {
// sidecars need to wait for all main containers to exit
to.prereqs[ic.Name] = append(to.prereqs[ic.Name], mainContainerChannels...)
// if there is a later sidecar, this container needs to wait for it to finish
if previousSidecarName != "" {
to.prereqs[ic.Name] = append(to.prereqs[ic.Name], to.terminated[previousSidecarName])
}
previousSidecarName = ic.Name
}
}
return to
}
// waitForTurn waits until it is time for the container with the specified name to begin terminating, up until
// the specified grace period. If gracePeriod = 0, there is no wait.
func (o *terminationOrdering) waitForTurn(name string, gracePeriod int64) float64 {
// if there is no grace period, we don't wait
if gracePeriod <= 0 {
return 0
}
start := time.Now()
remainingGrace := time.NewTimer(time.Duration(gracePeriod) * time.Second)
for _, c := range o.prereqs[name] {
select {
case <-c:
case <-remainingGrace.C:
// grace period expired, so immediately exit
return time.Since(start).Seconds()
}
}
return time.Since(start).Seconds()
}
// containerTerminated should be called once the container with the speecified name has exited.
func (o *terminationOrdering) containerTerminated(name string) {
if ch, ok := o.terminated[name]; ok {
close(ch)
}
}