Merge pull request #34287 from Random-Liu/add-sandbox-gc-minage
Automatic merge from submit-queue Add sandbox gc minage Fixes https://github.com/kubernetes/kubernetes/issues/34272. Fixes https://github.com/kubernetes/kubernetes/issues/33984. This PR: 1) Change the `GetPodStatus` to get statuses of all containers in a pod instead of only containers belonging to existing sandboxes. This is because sandbox may be removed by GC or by users, kubelet should be able to deal with this case. 2) Change the CRI comment to clarify the timestamp unit (nanosecond). 2) Add MinAge for sandbox GC Policy. @yujuhong @feiskyer @yifan-gu /cc @kubernetes/sig-node
This commit is contained in:
		| @@ -812,7 +812,7 @@ type PodSandboxStatus struct { | ||||
| 	Metadata *PodSandboxMetadata `protobuf:"bytes,2,opt,name=metadata" json:"metadata,omitempty"` | ||||
| 	// State of the sandbox. | ||||
| 	State *PodSandBoxState `protobuf:"varint,3,opt,name=state,enum=runtime.PodSandBoxState" json:"state,omitempty"` | ||||
| 	// Creation timestamp of the sandbox | ||||
| 	// Creation timestamp of the sandbox in nanoseconds. | ||||
| 	CreatedAt *int64 `protobuf:"varint,4,opt,name=created_at,json=createdAt" json:"created_at,omitempty"` | ||||
| 	// Network contains network status if network is handled by the runtime. | ||||
| 	Network *PodSandboxNetworkStatus `protobuf:"bytes,5,opt,name=network" json:"network,omitempty"` | ||||
| @@ -971,7 +971,7 @@ type PodSandbox struct { | ||||
| 	Metadata *PodSandboxMetadata `protobuf:"bytes,2,opt,name=metadata" json:"metadata,omitempty"` | ||||
| 	// The state of the PodSandbox | ||||
| 	State *PodSandBoxState `protobuf:"varint,3,opt,name=state,enum=runtime.PodSandBoxState" json:"state,omitempty"` | ||||
| 	// Creation timestamps of the sandbox | ||||
| 	// Creation timestamps of the sandbox in nanoseconds | ||||
| 	CreatedAt *int64 `protobuf:"varint,4,opt,name=created_at,json=createdAt" json:"created_at,omitempty"` | ||||
| 	// The labels of the PodSandbox | ||||
| 	Labels map[string]string `protobuf:"bytes,5,rep,name=labels" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` | ||||
| @@ -1740,7 +1740,7 @@ type Container struct { | ||||
| 	ImageRef *string `protobuf:"bytes,5,opt,name=image_ref,json=imageRef" json:"image_ref,omitempty"` | ||||
| 	// State is the state of the container. | ||||
| 	State *ContainerState `protobuf:"varint,6,opt,name=state,enum=runtime.ContainerState" json:"state,omitempty"` | ||||
| 	// Creation time of the container. | ||||
| 	// Creation time of the container in nanoseconds. | ||||
| 	CreatedAt *int64 `protobuf:"varint,7,opt,name=created_at,json=createdAt" json:"created_at,omitempty"` | ||||
| 	// Labels are key value pairs that may be used to scope and select individual resources. | ||||
| 	Labels map[string]string `protobuf:"bytes,8,rep,name=labels" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` | ||||
| @@ -1862,11 +1862,11 @@ type ContainerStatus struct { | ||||
| 	Metadata *ContainerMetadata `protobuf:"bytes,2,opt,name=metadata" json:"metadata,omitempty"` | ||||
| 	// Status of the container. | ||||
| 	State *ContainerState `protobuf:"varint,3,opt,name=state,enum=runtime.ContainerState" json:"state,omitempty"` | ||||
| 	// Creation time of the container. | ||||
| 	// Creation time of the container in nanoseconds. | ||||
| 	CreatedAt *int64 `protobuf:"varint,4,opt,name=created_at,json=createdAt" json:"created_at,omitempty"` | ||||
| 	// Start time of the container. | ||||
| 	// Start time of the container in nanoseconds. | ||||
| 	StartedAt *int64 `protobuf:"varint,5,opt,name=started_at,json=startedAt" json:"started_at,omitempty"` | ||||
| 	// Finish time of the container. | ||||
| 	// Finish time of the container in nanoseconds. | ||||
| 	FinishedAt *int64 `protobuf:"varint,6,opt,name=finished_at,json=finishedAt" json:"finished_at,omitempty"` | ||||
| 	// Exit code of the container. | ||||
| 	ExitCode *int32 `protobuf:"varint,7,opt,name=exit_code,json=exitCode" json:"exit_code,omitempty"` | ||||
|   | ||||
| @@ -251,7 +251,7 @@ message PodSandboxStatus { | ||||
|     optional PodSandboxMetadata metadata = 2; | ||||
|     // State of the sandbox. | ||||
|     optional PodSandBoxState state = 3; | ||||
|     // Creation timestamp of the sandbox | ||||
|     // Creation timestamp of the sandbox in nanoseconds. | ||||
|     optional int64 created_at = 4; | ||||
|     // Network contains network status if network is handled by the runtime. | ||||
|     optional PodSandboxNetworkStatus network = 5; | ||||
| @@ -296,7 +296,7 @@ message PodSandbox { | ||||
|     optional PodSandboxMetadata metadata = 2; | ||||
|     // The state of the PodSandbox | ||||
|     optional PodSandBoxState state = 3; | ||||
|     // Creation timestamps of the sandbox | ||||
|     // Creation timestamps of the sandbox in nanoseconds | ||||
|     optional int64 created_at = 4; | ||||
|     // The labels of the PodSandbox | ||||
|     map<string, string> labels = 5; | ||||
| @@ -533,7 +533,7 @@ message Container { | ||||
|     optional string image_ref = 5; | ||||
|     // State is the state of the container. | ||||
|     optional ContainerState state = 6; | ||||
|     // Creation time of the container. | ||||
|     // Creation time of the container in nanoseconds. | ||||
|     optional int64 created_at = 7; | ||||
|     // Labels are key value pairs that may be used to scope and select individual resources. | ||||
|     map<string, string> labels = 8; | ||||
| @@ -560,11 +560,11 @@ message ContainerStatus { | ||||
|     optional ContainerMetadata metadata = 2; | ||||
|     // Status of the container. | ||||
|     optional ContainerState state = 3; | ||||
|     // Creation time of the container. | ||||
|     // Creation time of the container in nanoseconds. | ||||
|     optional int64 created_at = 4; | ||||
|     // Start time of the container. | ||||
|     // Start time of the container in nanoseconds. | ||||
|     optional int64 started_at = 5; | ||||
|     // Finish time of the container. | ||||
|     // Finish time of the container in nanoseconds. | ||||
|     optional int64 finished_at = 6; | ||||
|     // Exit code of the container. | ||||
|     optional int32 exit_code = 7; | ||||
|   | ||||
| @@ -19,6 +19,7 @@ package dockershim | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strings" | ||||
| 	"time" | ||||
|  | ||||
| 	dockertypes "github.com/docker/engine-api/types" | ||||
|  | ||||
| @@ -57,6 +58,8 @@ func toRuntimeAPIContainer(c *dockertypes.Container) (*runtimeApi.Container, err | ||||
| 	} | ||||
| 	labels, annotations := extractLabels(c.Labels) | ||||
| 	sandboxID := c.Labels[sandboxIDLabelKey] | ||||
| 	// The timestamp in dockertypes.Container is in seconds. | ||||
| 	createdAt := c.Created * int64(time.Second) | ||||
| 	return &runtimeApi.Container{ | ||||
| 		Id:           &c.ID, | ||||
| 		PodSandboxId: &sandboxID, | ||||
| @@ -64,6 +67,7 @@ func toRuntimeAPIContainer(c *dockertypes.Container) (*runtimeApi.Container, err | ||||
| 		Image:        &runtimeApi.ImageSpec{Image: &c.Image}, | ||||
| 		ImageRef:     &c.ImageID, | ||||
| 		State:        &state, | ||||
| 		CreatedAt:    &createdAt, | ||||
| 		Labels:       labels, | ||||
| 		Annotations:  annotations, | ||||
| 	}, nil | ||||
| @@ -117,11 +121,13 @@ func toRuntimeAPISandbox(c *dockertypes.Container) (*runtimeApi.PodSandbox, erro | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	labels, annotations := extractLabels(c.Labels) | ||||
| 	// The timestamp in dockertypes.Container is in seconds. | ||||
| 	createdAt := c.Created * int64(time.Second) | ||||
| 	return &runtimeApi.PodSandbox{ | ||||
| 		Id:          &c.ID, | ||||
| 		Metadata:    metadata, | ||||
| 		State:       &state, | ||||
| 		CreatedAt:   &c.Created, | ||||
| 		CreatedAt:   &createdAt, | ||||
| 		Labels:      labels, | ||||
| 		Annotations: annotations, | ||||
| 	}, nil | ||||
|   | ||||
| @@ -61,6 +61,7 @@ func TestListContainers(t *testing.T) { | ||||
|  | ||||
| 	expected := []*runtimeApi.Container{} | ||||
| 	state := runtimeApi.ContainerState_RUNNING | ||||
| 	var createdAt int64 = 0 | ||||
| 	for i := range configs { | ||||
| 		// We don't care about the sandbox id; pass a bogus one. | ||||
| 		sandboxID := fmt.Sprintf("sandboxid%d", i) | ||||
| @@ -77,6 +78,7 @@ func TestListContainers(t *testing.T) { | ||||
| 			Id:           &id, | ||||
| 			PodSandboxId: &sandboxID, | ||||
| 			State:        &state, | ||||
| 			CreatedAt:    &createdAt, | ||||
| 			Image:        configs[i].Image, | ||||
| 			ImageRef:     &imageRef, | ||||
| 			Labels:       configs[i].Labels, | ||||
|   | ||||
| @@ -35,8 +35,9 @@ import ( | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/dockershim" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/events" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/qos" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/types" | ||||
| 	"k8s.io/kubernetes/pkg/kubelet/util/format" | ||||
| 	"k8s.io/kubernetes/pkg/types" | ||||
| 	kubetypes "k8s.io/kubernetes/pkg/types" | ||||
| 	utilruntime "k8s.io/kubernetes/pkg/util/runtime" | ||||
| 	"k8s.io/kubernetes/pkg/util/sets" | ||||
| 	"k8s.io/kubernetes/pkg/util/term" | ||||
| @@ -115,7 +116,7 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb | ||||
| } | ||||
|  | ||||
| // getContainerLogsPath gets log path for container. | ||||
| func getContainerLogsPath(containerName string, podUID types.UID) string { | ||||
| func getContainerLogsPath(containerName string, podUID kubetypes.UID) string { | ||||
| 	return path.Join(podLogsRootDirectory, string(podUID), fmt.Sprintf("%s.log", containerName)) | ||||
| } | ||||
|  | ||||
| @@ -345,10 +346,11 @@ func getTerminationMessage(status *runtimeApi.ContainerStatus, kubeStatus *kubec | ||||
| 	return message | ||||
| } | ||||
|  | ||||
| // getKubeletContainerStatuses gets all containers' status for the pod sandbox. | ||||
| func (m *kubeGenericRuntimeManager) getKubeletContainerStatuses(podSandboxID string) ([]*kubecontainer.ContainerStatus, error) { | ||||
| // getPodContainerStatuses gets all containers' statuses for the pod. | ||||
| func (m *kubeGenericRuntimeManager) getPodContainerStatuses(uid kubetypes.UID, name, namespace string) ([]*kubecontainer.ContainerStatus, error) { | ||||
| 	// Select all containers of the given pod. | ||||
| 	containers, err := m.runtimeService.ListContainers(&runtimeApi.ContainerFilter{ | ||||
| 		PodSandboxId: &podSandboxID, | ||||
| 		LabelSelector: map[string]string{types.KubernetesPodUIDLabel: string(uid)}, | ||||
| 	}) | ||||
| 	if err != nil { | ||||
| 		glog.Errorf("ListContainers error: %v", err) | ||||
| @@ -377,16 +379,16 @@ func (m *kubeGenericRuntimeManager) getKubeletContainerStatuses(podSandboxID str | ||||
| 			Hash:         annotatedInfo.Hash, | ||||
| 			RestartCount: annotatedInfo.RestartCount, | ||||
| 			State:        toKubeContainerState(c.GetState()), | ||||
| 			CreatedAt:    time.Unix(status.GetCreatedAt(), 0), | ||||
| 			CreatedAt:    time.Unix(0, status.GetCreatedAt()), | ||||
| 		} | ||||
|  | ||||
| 		if c.GetState() == runtimeApi.ContainerState_RUNNING { | ||||
| 			cStatus.StartedAt = time.Unix(status.GetStartedAt(), 0) | ||||
| 			cStatus.StartedAt = time.Unix(0, status.GetStartedAt()) | ||||
| 		} else { | ||||
| 			cStatus.Reason = status.GetReason() | ||||
| 			cStatus.Message = status.GetMessage() | ||||
| 			cStatus.ExitCode = int(status.GetExitCode()) | ||||
| 			cStatus.FinishedAt = time.Unix(status.GetFinishedAt(), 0) | ||||
| 			cStatus.FinishedAt = time.Unix(0, status.GetFinishedAt()) | ||||
| 		} | ||||
|  | ||||
| 		tMessage := getTerminationMessage(status, cStatus, annotatedInfo.TerminationMessagePath) | ||||
|   | ||||
| @@ -27,6 +27,20 @@ import ( | ||||
| 	"k8s.io/kubernetes/pkg/types" | ||||
| ) | ||||
|  | ||||
| // sandboxMinGCAge is the minimum age for an empty sandbox before it is garbage collected. | ||||
| // This is introduced to avoid a sandbox being garbage collected before its containers are | ||||
| // created. | ||||
| // Notice that if the first container of a sandbox is created too late (exceeds sandboxMinGCAge), | ||||
| // the sandbox could still be garbaged collected. In that case, SyncPod will recreate the | ||||
| // sandbox and make sure old containers are all stopped. | ||||
| // In the following figure, 'o' is a stopped sandbox, 'x' is a removed sandbox. It shows | ||||
| // that, approximately if a sandbox keeps crashing and MinAge = 1/n GC Period, there will | ||||
| // be 1/n more sandboxes not garbage collected. | ||||
| //      oooooo|xxxxxx|xxxxxx|  <--- MinAge = 0 | ||||
| //     gc     gc     gc    gc | ||||
| //      oooooo|oooxxx|xxxxxx|  <--- MinAge = 1/2 GC Perod | ||||
| const sandboxMinGCAge time.Duration = 30 * time.Second | ||||
|  | ||||
| // containerGC is the manager of garbage collection. | ||||
| type containerGC struct { | ||||
| 	client    internalApi.RuntimeService | ||||
| @@ -141,7 +155,7 @@ func (cgc *containerGC) evictableContainers(minAge time.Duration) (containersByE | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		createdAt := time.Unix(container.GetCreatedAt(), 0) | ||||
| 		createdAt := time.Unix(0, container.GetCreatedAt()) | ||||
| 		if newestGCTime.Before(createdAt) { | ||||
| 			continue | ||||
| 		} | ||||
| @@ -182,6 +196,7 @@ func (cgc *containerGC) evictableSandboxes() ([]string, error) { | ||||
| 	} | ||||
|  | ||||
| 	evictSandboxes := make([]string, 0) | ||||
| 	newestGCTime := time.Now().Add(-sandboxMinGCAge) | ||||
| 	for _, sandbox := range sandboxes { | ||||
| 		// Prune out ready sandboxes. | ||||
| 		if sandbox.GetState() == runtimeApi.PodSandBoxState_READY { | ||||
| @@ -201,6 +216,12 @@ func (cgc *containerGC) evictableSandboxes() ([]string, error) { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		// Only garbage collect sandboxes older than sandboxMinGCAge. | ||||
| 		createdAt := time.Unix(0, sandbox.GetCreatedAt()) | ||||
| 		if createdAt.After(newestGCTime) { | ||||
| 			continue | ||||
| 		} | ||||
|  | ||||
| 		evictSandboxes = append(evictSandboxes, sandboxID) | ||||
| 	} | ||||
|  | ||||
|   | ||||
| @@ -876,15 +876,14 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp | ||||
| 			UID:       uid, | ||||
| 		}, | ||||
| 	}) | ||||
| 	glog.V(4).Infof("getSandboxIDByPodUID got sandbox IDs %q for pod %q(UID:%q)", podSandboxIDs, podFullName, string(uid)) | ||||
| 	glog.V(4).Infof("getSandboxIDByPodUID got sandbox IDs %q for pod %q", podSandboxIDs, podFullName) | ||||
|  | ||||
| 	sandboxStatuses := make([]*runtimeApi.PodSandboxStatus, len(podSandboxIDs)) | ||||
| 	containerStatuses := []*kubecontainer.ContainerStatus{} | ||||
| 	podIP := "" | ||||
| 	for idx, podSandboxID := range podSandboxIDs { | ||||
| 		podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID) | ||||
| 		if err != nil { | ||||
| 			glog.Errorf("PodSandboxStatus for pod (uid:%v, name:%s, namespace:%s) error: %v", uid, name, namespace, err) | ||||
| 			glog.Errorf("PodSandboxStatus of sandbox %q for pod %q error: %v", podSandboxID, podFullName, err) | ||||
| 			return nil, err | ||||
| 		} | ||||
| 		sandboxStatuses[idx] = podSandboxStatus | ||||
| @@ -893,13 +892,13 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(uid kubetypes.UID, name, namesp | ||||
| 		if idx == 0 && podSandboxStatus.GetState() == runtimeApi.PodSandBoxState_READY { | ||||
| 			podIP = m.determinePodSandboxIP(namespace, name, podSandboxStatus) | ||||
| 		} | ||||
|  | ||||
| 		statuses, err := m.getKubeletContainerStatuses(podSandboxID) | ||||
| 		if err != nil { | ||||
| 			glog.Errorf("getKubeletContainerStatuses for sandbox %s failed: %v", podSandboxID, err) | ||||
| 			return nil, err | ||||
| 	} | ||||
| 		containerStatuses = append(containerStatuses, statuses...) | ||||
|  | ||||
| 	// Get statuses of all containers visible in the pod. | ||||
| 	containerStatuses, err := m.getPodContainerStatuses(uid, name, namespace) | ||||
| 	if err != nil { | ||||
| 		glog.Errorf("getPodContainerStatuses for pod %q failed: %v", podFullName, err) | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	return &kubecontainer.PodStatus{ | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Submit Queue
					Kubernetes Submit Queue