diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index ad58e2cd746..fc858693f71 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -182,6 +182,7 @@ const ( // owner: @adrianreber // kep: https://kep.k8s.io/2008 // alpha: v1.25 + // beta: v1.30 // // Enables container Checkpoint support in the kubelet ContainerCheckpoint featuregate.Feature = "ContainerCheckpoint" @@ -966,7 +967,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS CloudControllerManagerWebhook: {Default: false, PreRelease: featuregate.Alpha}, - ContainerCheckpoint: {Default: false, PreRelease: featuregate.Alpha}, + ContainerCheckpoint: {Default: true, PreRelease: featuregate.Beta}, ConsistentHTTPGetHandlers: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31 diff --git a/pkg/kubelet/server/auth.go b/pkg/kubelet/server/auth.go index 962e1ce9507..5316ba45765 100644 --- a/pkg/kubelet/server/auth.go +++ b/pkg/kubelet/server/auth.go @@ -105,6 +105,8 @@ func (n nodeAuthorizerAttributesGetter) GetRequestAttributes(u user.Info, r *htt case isSubpath(requestPath, logsPath): // "log" to match other log subresources (pods/log, etc) attrs.Subresource = "log" + case isSubpath(requestPath, checkpointPath): + attrs.Subresource = "checkpoint" } klog.V(5).InfoS("Node request attributes", "user", attrs.GetUser().GetName(), "verb", attrs.GetVerb(), "resource", attrs.GetResource(), "subresource", attrs.GetSubresource()) diff --git a/pkg/kubelet/server/auth_test.go b/pkg/kubelet/server/auth_test.go index d5f2b6928f4..b6a32bbe68e 100644 --- a/pkg/kubelet/server/auth_test.go +++ b/pkg/kubelet/server/auth_test.go @@ -110,7 +110,7 @@ func AuthzTestCases() []AuthzTestCase { testPaths := map[string]string{ "/attach/{podNamespace}/{podID}/{containerName}": "proxy", "/attach/{podNamespace}/{podID}/{uid}/{containerName}": "proxy", - "/checkpoint/{podNamespace}/{podID}/{containerName}": "proxy", + "/checkpoint/{podNamespace}/{podID}/{containerName}": "checkpoint", "/configz": "proxy", "/containerLogs/{podNamespace}/{podID}/{containerName}": "proxy", "/debug/flags/v": "proxy", diff --git a/pkg/kubelet/server/server.go b/pkg/kubelet/server/server.go index d35abf14171..a630d8076b5 100644 --- a/pkg/kubelet/server/server.go +++ b/pkg/kubelet/server/server.go @@ -98,6 +98,7 @@ const ( proberMetricsPath = "/metrics/probes" statsPath = "/stats/" logsPath = "/logs/" + checkpointPath = "/checkpoint/" pprofBasePath = "/debug/pprof/" debugFlagPath = "/debug/flags/v" ) @@ -441,7 +442,7 @@ func (s *Server) InstallDefaultHandlers() { if utilfeature.DefaultFeatureGate.Enabled(features.ContainerCheckpoint) { s.addMetricsBucketMatcher("checkpoint") ws = &restful.WebService{} - ws.Path("/checkpoint").Produces(restful.MIME_JSON) + ws.Path(checkpointPath).Produces(restful.MIME_JSON) ws.Route(ws.POST("/{podNamespace}/{podID}/{containerName}"). To(s.checkpoint). Operation("checkpoint")) diff --git a/pkg/kubelet/server/server_test.go b/pkg/kubelet/server/server_test.go index f99e54417e4..aa10aac83fd 100644 --- a/pkg/kubelet/server/server_test.go +++ b/pkg/kubelet/server/server_test.go @@ -858,18 +858,24 @@ func TestContainerLogsWithInvalidTail(t *testing.T) { } func TestCheckpointContainer(t *testing.T) { - // Enable features.ContainerCheckpoint during test - defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerCheckpoint, true)() - - fw := newServerTest() - defer fw.testHTTPServer.Close() podNamespace := "other" podName := "foo" expectedContainerName := "baz" - // GetPodByName() should always fail - fw.fakeKubelet.podByNameFunc = func(namespace, name string) (*v1.Pod, bool) { - return nil, false + + setupTest := func(featureGate bool) *serverTestFramework { + // Enable features.ContainerCheckpoint during test + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ContainerCheckpoint, featureGate)() + + fw := newServerTest() + // GetPodByName() should always fail + fw.fakeKubelet.podByNameFunc = func(namespace, name string) (*v1.Pod, bool) { + return nil, false + } + return fw } + fw := setupTest(true) + defer fw.testHTTPServer.Close() + t.Run("wrong pod namespace", func(t *testing.T) { resp, err := http.Post(fw.testHTTPServer.URL+"/checkpoint/"+podNamespace+"/"+podName+"/"+expectedContainerName, "", nil) if err != nil { @@ -927,6 +933,19 @@ func TestCheckpointContainer(t *testing.T) { } assert.Equal(t, resp.StatusCode, 200) }) + + // Now test for 404 if checkpointing support is explicitly disabled. + fw.testHTTPServer.Close() + fw = setupTest(false) + defer fw.testHTTPServer.Close() + setPodByNameFunc(fw, podNamespace, podName, expectedContainerName) + t.Run("checkpointing fails because disabled", func(t *testing.T) { + resp, err := http.Post(fw.testHTTPServer.URL+"/checkpoint/"+podNamespace+"/"+podName+"/"+expectedContainerName, "", nil) + if err != nil { + t.Errorf("Got error POSTing: %v", err) + } + assert.Equal(t, 404, resp.StatusCode) + }) } func makeReq(t *testing.T, method, url, clientProtocol string) *http.Request { diff --git a/test/e2e_node/checkpoint_container.go b/test/e2e_node/checkpoint_container.go index 93bb770d28c..4096d827a72 100644 --- a/test/e2e_node/checkpoint_container.go +++ b/test/e2e_node/checkpoint_container.go @@ -34,11 +34,14 @@ import ( clientset "k8s.io/client-go/kubernetes" restclient "k8s.io/client-go/rest" "k8s.io/kubernetes/test/e2e/framework" + e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" "k8s.io/kubernetes/test/e2e/nodefeature" testutils "k8s.io/kubernetes/test/utils" imageutils "k8s.io/kubernetes/test/utils/image" admissionapi "k8s.io/pod-security-admission/api" + + "github.com/onsi/gomega" ) const ( @@ -75,6 +78,58 @@ func proxyPostRequest(ctx context.Context, c clientset.Interface, node, endpoint } } +func getCheckpointContainerMetric(ctx context.Context, f *framework.Framework, pod *v1.Pod) (int, error) { + framework.Logf("Getting 'checkpoint_container' metrics from %q", pod.Spec.NodeName) + ms, err := e2emetrics.GetKubeletMetrics( + ctx, + f.ClientSet, + pod.Spec.NodeName, + ) + if err != nil { + return 0, err + } + + runtimeOperationsTotal, ok := ms["runtime_operations_total"] + if !ok { + // If the metric was not found it was probably not written to, yet. + return 0, nil + } + + for _, item := range runtimeOperationsTotal { + if item.Metric["__name__"] == "kubelet_runtime_operations_total" && item.Metric["operation_type"] == "checkpoint_container" { + return int(item.Value), nil + } + } + // If the metric was not found it was probably not written to, yet. + return 0, nil +} + +func getCheckpointContainerErrorMetric(ctx context.Context, f *framework.Framework, pod *v1.Pod) (int, error) { + framework.Logf("Getting 'checkpoint_container' error metrics from %q", pod.Spec.NodeName) + ms, err := e2emetrics.GetKubeletMetrics( + ctx, + f.ClientSet, + pod.Spec.NodeName, + ) + if err != nil { + return 0, err + } + + runtimeOperationsErrorsTotal, ok := ms["runtime_operations_errors_total"] + if !ok { + // If the metric was not found it was probably not written to, yet. + return 0, nil + } + + for _, item := range runtimeOperationsErrorsTotal { + if item.Metric["__name__"] == "kubelet_runtime_operations_errors_total" && item.Metric["operation_type"] == "checkpoint_container" { + return int(item.Value), nil + } + } + // If the metric was not found it was probably not written to, yet. + return 0, nil +} + var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, func() { f := framework.NewDefaultFramework("checkpoint-container-test") f.NamespacePodSecurityLevel = admissionapi.LevelBaseline @@ -82,7 +137,10 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun ginkgo.By("creating a target pod") podClient := e2epod.NewPodClient(f) pod := podClient.CreateSync(ctx, &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{Name: "checkpoint-container-pod"}, + ObjectMeta: metav1.ObjectMeta{ + Name: "checkpoint-container-pod", + Namespace: f.Namespace.Name, + }, Spec: v1.PodSpec{ Containers: []v1.Container{ { @@ -108,6 +166,15 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun framework.Failf("pod %q should be ready", p.Name) } + // No checkpoint operation should have been logged + checkpointContainerMetric, err := getCheckpointContainerMetric(ctx, f, pod) + framework.ExpectNoError(err) + gomega.Expect(checkpointContainerMetric).To(gomega.Equal(0)) + // No error should have been logged + checkpointContainerErrorMetric, err := getCheckpointContainerErrorMetric(ctx, f, pod) + framework.ExpectNoError(err) + gomega.Expect(checkpointContainerErrorMetric).To(gomega.Equal(0)) + framework.Logf( "About to checkpoint container %q on %q", pod.Spec.Containers[0].Name, @@ -144,6 +211,12 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun // If the container engine has not implemented the Checkpoint CRI API // we will get 500 and a message with // '(rpc error: code = Unimplemented desc = unknown method CheckpointContainer' + // or + // '(rpc error: code = Unimplemented desc = method CheckpointContainer not implemented)' + // if the container engine returns that it explicitly has disabled support for it. + // or + // '(rpc error: code = Unknown desc = checkpoint/restore support not available)' + // if the container engine explicitly disabled the checkpoint/restore support if (int(statusError.ErrStatus.Code)) == http.StatusInternalServerError { if strings.Contains( statusError.ErrStatus.Message, @@ -152,8 +225,26 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun ginkgo.Skip("Container engine does not implement 'CheckpointContainer'") return } + if strings.Contains( + statusError.ErrStatus.Message, + "(rpc error: code = Unimplemented desc = method CheckpointContainer not implemented)", + ) { + ginkgo.Skip("Container engine does not implement 'CheckpointContainer'") + return + } + if strings.Contains( + statusError.ErrStatus.Message, + "(rpc error: code = Unknown desc = checkpoint/restore support not available)", + ) { + ginkgo.Skip("Container engine does not implement 'CheckpointContainer'") + return + } } - framework.Failf("Unexpected status code (%d) during 'CheckpointContainer'", statusError.ErrStatus.Code) + framework.Failf( + "Unexpected status code (%d) during 'CheckpointContainer': %q", + statusError.ErrStatus.Code, + statusError.ErrStatus.Message, + ) } framework.ExpectNoError(err) @@ -205,5 +296,13 @@ var _ = SIGDescribe("Checkpoint Container", nodefeature.CheckpointContainer, fun // cleanup checkpoint archive os.RemoveAll(item) } + // Exactly one checkpoint operation should have happened + checkpointContainerMetric, err = getCheckpointContainerMetric(ctx, f, pod) + framework.ExpectNoError(err) + gomega.Expect(checkpointContainerMetric).To(gomega.Equal(1)) + // No error should have been logged + checkpointContainerErrorMetric, err = getCheckpointContainerErrorMetric(ctx, f, pod) + framework.ExpectNoError(err) + gomega.Expect(checkpointContainerErrorMetric).To(gomega.Equal(0)) }) })