kubernetes/pkg/kubelet/kuberuntime/kuberuntime_termination_order.go

/*
Copyright 2023 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package kuberuntime

import (
	"time"

	v1 "k8s.io/api/core/v1"

	"k8s.io/kubernetes/pkg/kubelet/types"
)

// terminationOrdering is used to enforce a termination ordering for sidecar containers.  It sets up
// dependencies between sidecars and allows the pod termination process to wait until the grace period
// expires, or all dependent containers have finished terminating.
type terminationOrdering struct {
	// terminated is a map from container name to a channel, that if closed
	// indicates that the container with that name was terminated
	terminated map[string]chan struct{}
	// prereqs is a map from container name to a list of channel that the container
	// must wait on to ensure termination ordering
	prereqs map[string][]chan struct{}
}

// newTerminationOrdering constructs a terminationOrdering based on the pod spec and the currently running containers.
func newTerminationOrdering(pod *v1.Pod, runningContainerNames []string) *terminationOrdering {
	to := &terminationOrdering{
		prereqs:    map[string][]chan struct{}{},
		terminated: map[string]chan struct{}{},
	}

	runningContainers := map[string]struct{}{}
	for _, name := range runningContainerNames {
		runningContainers[name] = struct{}{}
	}

	var mainContainerChannels []chan struct{}
	// sidecar containers need to wait on main containers, so we create a channel per main container
	// for them to wait on
	for _, c := range pod.Spec.Containers {
		channel := make(chan struct{})
		to.terminated[c.Name] = channel
		mainContainerChannels = append(mainContainerChannels, channel)

		// if its not a running container, pre-close the channel so nothing waits on it
		if _, isRunning := runningContainers[c.Name]; !isRunning {
			close(channel)
		}
	}

	var previousSidecarName string
	for i := range pod.Spec.InitContainers {
		// get the init containers in reverse order
		ic := pod.Spec.InitContainers[len(pod.Spec.InitContainers)-i-1]

		to.terminated[ic.Name] = make(chan struct{})

		if types.IsRestartableInitContainer(&ic) {
			// sidecars need to wait for all main containers to exit
			to.prereqs[ic.Name] = append(to.prereqs[ic.Name], mainContainerChannels...)

			// if there is a later sidecar, this container needs to wait for it to finish
			if previousSidecarName != "" {
				to.prereqs[ic.Name] = append(to.prereqs[ic.Name], to.terminated[previousSidecarName])
			}
			previousSidecarName = ic.Name
		}
	}
	return to
}

// waitForTurn waits until it is time for the container with the specified name to begin terminating, up until
// the specified grace period.  If gracePeriod = 0, there is no wait.
func (o *terminationOrdering) waitForTurn(name string, gracePeriod int64) float64 {
	// if there is no grace period, we don't wait
	if gracePeriod <= 0 {
		return 0
	}

	start := time.Now()
	remainingGrace := time.NewTimer(time.Duration(gracePeriod) * time.Second)

	for _, c := range o.prereqs[name] {
		select {
		case <-c:
		case <-remainingGrace.C:
			// grace period expired, so immediately exit
			return time.Since(start).Seconds()
		}
	}

	return time.Since(start).Seconds()
}

// containerTerminated should be called once the container with the speecified name has exited.
func (o *terminationOrdering) containerTerminated(name string) {
	if ch, ok := o.terminated[name]; ok {
		close(ch)
	}
}