From 1d43fd46940b6182bc8da625aa4cd1b1a1ca5cee Mon Sep 17 00:00:00 2001 From: Haowei Cai Date: Tue, 10 Nov 2020 15:12:14 -0800 Subject: [PATCH 1/4] add apiserver lease garbage collector --- .../apiserverleasegc/gc_controller.go | 135 ++++++++++++++++++ pkg/controlplane/instance.go | 18 ++- 2 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 pkg/controlplane/controller/apiserverleasegc/gc_controller.go diff --git a/pkg/controlplane/controller/apiserverleasegc/gc_controller.go b/pkg/controlplane/controller/apiserverleasegc/gc_controller.go new file mode 100644 index 00000000000..a8621e3b1ee --- /dev/null +++ b/pkg/controlplane/controller/apiserverleasegc/gc_controller.go @@ -0,0 +1,135 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package apiserverleasegc + +import ( + "context" + "fmt" + "time" + + v1 "k8s.io/api/coordination/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + informers "k8s.io/client-go/informers/coordination/v1" + "k8s.io/client-go/kubernetes" + listers "k8s.io/client-go/listers/coordination/v1" + "k8s.io/client-go/tools/cache" + + "k8s.io/klog/v2" +) + +// Controller deletes expired apiserver leases. +type Controller struct { + kubeclientset kubernetes.Interface + + leaseLister listers.LeaseLister + leaseInformer cache.SharedIndexInformer + leasesSynced cache.InformerSynced + + leaseNamespace string + + gcCheckPeriod time.Duration +} + +// NewAPIServerLeaseGC creates a new Controller. +func NewAPIServerLeaseGC(clientset kubernetes.Interface, gcCheckPeriod time.Duration, leaseNamespace, leaseLabelSelector string) *Controller { + // we construct our own informer because we need such a small subset of the information available. + // Just one namespace with label selection. + leaseInformer := informers.NewFilteredLeaseInformer( + clientset, + leaseNamespace, + 0, + cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, + func(listOptions *metav1.ListOptions) { + listOptions.LabelSelector = leaseLabelSelector + }) + return &Controller{ + kubeclientset: clientset, + leaseLister: listers.NewLeaseLister(leaseInformer.GetIndexer()), + leaseInformer: leaseInformer, + leasesSynced: leaseInformer.HasSynced, + leaseNamespace: leaseNamespace, + gcCheckPeriod: gcCheckPeriod, + } +} + +// Run starts one worker. +func (c *Controller) Run(stopCh <-chan struct{}) { + defer utilruntime.HandleCrash() + defer klog.Infof("Shutting down apiserver lease garbage collector") + + klog.Infof("Starting apiserver lease garbage collector") + + // we have a personal informer that is narrowly scoped, start it. + go c.leaseInformer.Run(stopCh) + + if !cache.WaitForCacheSync(stopCh, c.leasesSynced) { + utilruntime.HandleError(fmt.Errorf("timed out waiting for caches to sync")) + return + } + + go wait.Until(c.gc, c.gcCheckPeriod, stopCh) + + <-stopCh +} + +func (c *Controller) gc() { + leases, err := c.leaseLister.Leases(c.leaseNamespace).List(labels.Everything()) + if err != nil { + klog.Errorf("Error while listing apiserver leases: %v", err) + return + } + for _, lease := range leases { + // evaluate lease from cache + if !isLeaseExpired(lease) { + continue + } + // double check latest lease from apiserver before deleting + lease, err := c.kubeclientset.CoordinationV1().Leases(c.leaseNamespace).Get(context.TODO(), lease.Name, metav1.GetOptions{}) + if err != nil && !errors.IsNotFound(err) { + klog.Errorf("Error getting lease: %v", err) + continue + } + if errors.IsNotFound(err) || lease == nil { + // the lease was deleted by the same GC controller in another apiserver + continue + } + // evaluate lease from apiserver + if !isLeaseExpired(lease) { + continue + } + if err := c.kubeclientset.CoordinationV1().Leases(c.leaseNamespace).Delete( + context.TODO(), lease.Name, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + // If we get a 404, the lease was deleted by the same GC controller + // in another apiserver. Only log error if we get something other than 404. + klog.Errorf("Error deleting lease: %v", err) + } + } +} + +func isLeaseExpired(lease *v1.Lease) bool { + currentTime := time.Now() + // Leases created by the apiserver lease controller should have non-nil renew time + // and lease duration set. Leases without these fields set are invalid and should + // be GC'ed. + return lease.Spec.RenewTime == nil || + lease.Spec.LeaseDurationSeconds == nil || + lease.Spec.RenewTime.Add(time.Duration(*lease.Spec.LeaseDurationSeconds)*time.Second).Before(currentTime) +} diff --git a/pkg/controlplane/instance.go b/pkg/controlplane/instance.go index 744185f9901..e9ffdb4441f 100644 --- a/pkg/controlplane/instance.go +++ b/pkg/controlplane/instance.go @@ -83,6 +83,7 @@ import ( discoveryclient "k8s.io/client-go/kubernetes/typed/discovery/v1beta1" "k8s.io/component-helpers/lease" api "k8s.io/kubernetes/pkg/apis/core" + "k8s.io/kubernetes/pkg/controlplane/controller/apiserverleasegc" "k8s.io/kubernetes/pkg/controlplane/controller/clusterauthenticationtrust" "k8s.io/kubernetes/pkg/controlplane/reconcilers" "k8s.io/kubernetes/pkg/controlplane/tunneler" @@ -130,6 +131,8 @@ const ( IdentityLeaseComponentLabelKey = "k8s.io/component" // KubeAPIServer defines variable used internally when referring to kube-apiserver component KubeAPIServer = "kube-apiserver" + // KubeAPIServerIdentityLeaseLabelSelector selects kube-apiserver identity leases + KubeAPIServerIdentityLeaseLabelSelector = IdentityLeaseComponentLabelKey + "=" + KubeAPIServer ) // ExtraConfig defines extra configuration for the master @@ -496,7 +499,7 @@ func (c completedConfig) New(delegationTarget genericapiserver.DelegationTarget) }) if utilfeature.DefaultFeatureGate.Enabled(apiserverfeatures.APIServerIdentity) { - m.GenericAPIServer.AddPostStartHookOrDie("start-kube-apiserver-lease-controller", func(hookContext genericapiserver.PostStartHookContext) error { + m.GenericAPIServer.AddPostStartHookOrDie("start-kube-apiserver-identity-lease-controller", func(hookContext genericapiserver.PostStartHookContext) error { kubeClient, err := kubernetes.NewForConfig(hookContext.LoopbackClientConfig) if err != nil { return err @@ -513,6 +516,19 @@ func (c completedConfig) New(delegationTarget genericapiserver.DelegationTarget) go controller.Run(wait.NeverStop) return nil }) + m.GenericAPIServer.AddPostStartHookOrDie("start-kube-apiserver-identity-lease-garbage-collector", func(hookContext genericapiserver.PostStartHookContext) error { + kubeClient, err := kubernetes.NewForConfig(hookContext.LoopbackClientConfig) + if err != nil { + return err + } + go apiserverleasegc.NewAPIServerLeaseGC( + kubeClient, + time.Duration(c.ExtraConfig.IdentityLeaseDurationSeconds)*time.Second, + metav1.NamespaceSystem, + KubeAPIServerIdentityLeaseLabelSelector, + ).Run(wait.NeverStop) + return nil + }) } return m, nil From 4794ba18ed9c8427e897c29b1fce485defbe8e50 Mon Sep 17 00:00:00 2001 From: Haowei Cai Date: Tue, 10 Nov 2020 15:12:33 -0800 Subject: [PATCH 2/4] integration test --- .../master/apiserver_identity_test.go | 105 +++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/test/integration/master/apiserver_identity_test.go b/test/integration/master/apiserver_identity_test.go index a36c4651a38..d02d4a379ff 100644 --- a/test/integration/master/apiserver_identity_test.go +++ b/test/integration/master/apiserver_identity_test.go @@ -22,6 +22,8 @@ import ( "testing" "time" + coordinationv1 "k8s.io/api/coordination/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/features" @@ -31,9 +33,12 @@ import ( kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing" "k8s.io/kubernetes/pkg/controlplane" "k8s.io/kubernetes/test/integration/framework" + "k8s.io/utils/pointer" ) -const apiserverIdentityLeaseLabelSelector = controlplane.IdentityLeaseComponentLabelKey + "=" + controlplane.KubeAPIServer +const ( + testLeaseName = "apiserver-lease-test" +) func TestCreateLeaseOnStart(t *testing.T) { defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.APIServerIdentity, true)() @@ -50,7 +55,7 @@ func TestCreateLeaseOnStart(t *testing.T) { leases, err := kubeclient. CoordinationV1(). Leases(metav1.NamespaceSystem). - List(context.TODO(), metav1.ListOptions{LabelSelector: apiserverIdentityLeaseLabelSelector}) + List(context.TODO(), metav1.ListOptions{LabelSelector: controlplane.KubeAPIServerIdentityLeaseLabelSelector}) if err != nil { return false, err } @@ -62,3 +67,99 @@ func TestCreateLeaseOnStart(t *testing.T) { t.Fatalf("Failed to see the kube-apiserver lease: %v", err) } } + +func TestLeaseGarbageCollection(t *testing.T) { + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.APIServerIdentity, true)() + result := kubeapiservertesting.StartTestServerOrDie(t, nil, + // This shorten the GC check period to make the test run faster. + // Since we are testing GC behavior on leases we create, what happens to + // the real apiserver lease doesn't matter. + []string{"--identity-lease-duration-seconds=1"}, + framework.SharedEtcd()) + defer result.TearDownFn() + + kubeclient, err := kubernetes.NewForConfig(result.ClientConfig) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + expiredLease := newTestLease(time.Now().Add(-2*time.Hour), metav1.NamespaceSystem) + t.Run("expired apiserver lease should be garbage collected", + testLeaseGarbageCollected(t, kubeclient, expiredLease)) + + freshLease := newTestLease(time.Now().Add(-2*time.Minute), metav1.NamespaceSystem) + t.Run("fresh apiserver lease should not be garbage collected", + testLeaseNotGarbageCollected(t, kubeclient, freshLease)) + + expiredLease.Labels = nil + t.Run("expired non-identity lease should not be garbage collected", + testLeaseNotGarbageCollected(t, kubeclient, expiredLease)) + + // identity leases (with k8s.io/component label) created in user namespaces should not be GC'ed + expiredNonKubeSystemLease := newTestLease(time.Now().Add(-2*time.Hour), metav1.NamespaceDefault) + t.Run("expired non-system identity lease should not be garbage collected", + testLeaseNotGarbageCollected(t, kubeclient, expiredNonKubeSystemLease)) +} + +func testLeaseGarbageCollected(t *testing.T, client kubernetes.Interface, lease *coordinationv1.Lease) func(t *testing.T) { + return func(t *testing.T) { + ns := lease.Namespace + if _, err := client.CoordinationV1().Leases(ns).Create(context.TODO(), lease, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unexpected error creating lease: %v", err) + } + if err := wait.PollImmediate(500*time.Millisecond, 5*time.Second, func() (bool, error) { + _, err := client.CoordinationV1().Leases(ns).Get(context.TODO(), lease.Name, metav1.GetOptions{}) + if err == nil { + return false, nil + } + if apierrors.IsNotFound(err) { + return true, nil + } + return false, err + }); err != nil { + t.Fatalf("Failed to see the expired lease garbage collected: %v", err) + } + + } +} + +func testLeaseNotGarbageCollected(t *testing.T, client kubernetes.Interface, lease *coordinationv1.Lease) func(t *testing.T) { + return func(t *testing.T) { + ns := lease.Namespace + if _, err := client.CoordinationV1().Leases(ns).Create(context.TODO(), lease, metav1.CreateOptions{}); err != nil { + t.Fatalf("Unexpected error creating lease: %v", err) + } + if err := wait.PollImmediate(500*time.Millisecond, 5*time.Second, func() (bool, error) { + _, err := client.CoordinationV1().Leases(ns).Get(context.TODO(), lease.Name, metav1.GetOptions{}) + if err != nil && apierrors.IsNotFound(err) { + return true, nil + } + return false, nil + }); err == nil { + t.Fatalf("Unexpected valid lease getting garbage collected") + } + if _, err := client.CoordinationV1().Leases(ns).Get(context.TODO(), lease.Name, metav1.GetOptions{}); err != nil { + t.Fatalf("Failed to retrieve valid lease: %v", err) + } + if err := client.CoordinationV1().Leases(ns).Delete(context.TODO(), lease.Name, metav1.DeleteOptions{}); err != nil { + t.Fatalf("Failed to clean up valid lease: %v", err) + } + } +} + +func newTestLease(acquireTime time.Time, namespace string) *coordinationv1.Lease { + return &coordinationv1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: testLeaseName, + Namespace: namespace, + Labels: map[string]string{ + controlplane.IdentityLeaseComponentLabelKey: controlplane.KubeAPIServer, + }, + }, + Spec: coordinationv1.LeaseSpec{ + HolderIdentity: pointer.StringPtr(testLeaseName), + LeaseDurationSeconds: pointer.Int32Ptr(3600), + AcquireTime: &metav1.MicroTime{Time: acquireTime}, + RenewTime: &metav1.MicroTime{Time: acquireTime}, + }, + } +} From bfebc7aefd649f66c4528296c709a7314c66bb45 Mon Sep 17 00:00:00 2001 From: Haowei Cai Date: Tue, 10 Nov 2020 15:12:43 -0800 Subject: [PATCH 3/4] generated --- pkg/controlplane/BUILD | 2 ++ .../controller/apiserverleasegc/BUILD | 35 +++++++++++++++++++ test/integration/master/BUILD | 1 + 3 files changed, 38 insertions(+) create mode 100644 pkg/controlplane/controller/apiserverleasegc/BUILD diff --git a/pkg/controlplane/BUILD b/pkg/controlplane/BUILD index b9c2d0637de..ebc25f6dda6 100644 --- a/pkg/controlplane/BUILD +++ b/pkg/controlplane/BUILD @@ -36,6 +36,7 @@ go_library( "//pkg/apis/rbac/install:go_default_library", "//pkg/apis/scheduling/install:go_default_library", "//pkg/apis/storage/install:go_default_library", + "//pkg/controlplane/controller/apiserverleasegc:go_default_library", "//pkg/controlplane/controller/clusterauthenticationtrust:go_default_library", "//pkg/controlplane/reconcilers:go_default_library", "//pkg/controlplane/tunneler:go_default_library", @@ -202,6 +203,7 @@ filegroup( name = "all-srcs", srcs = [ ":package-srcs", + "//pkg/controlplane/controller/apiserverleasegc:all-srcs", "//pkg/controlplane/controller/clusterauthenticationtrust:all-srcs", "//pkg/controlplane/controller/crdregistration:all-srcs", "//pkg/controlplane/reconcilers:all-srcs", diff --git a/pkg/controlplane/controller/apiserverleasegc/BUILD b/pkg/controlplane/controller/apiserverleasegc/BUILD new file mode 100644 index 00000000000..ad19c1eb723 --- /dev/null +++ b/pkg/controlplane/controller/apiserverleasegc/BUILD @@ -0,0 +1,35 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = ["gc_controller.go"], + importpath = "k8s.io/kubernetes/pkg/controlplane/controller/apiserverleasegc", + visibility = ["//visibility:public"], + deps = [ + "//staging/src/k8s.io/api/coordination/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/api/errors:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", + "//staging/src/k8s.io/client-go/informers/coordination/v1:go_default_library", + "//staging/src/k8s.io/client-go/kubernetes:go_default_library", + "//staging/src/k8s.io/client-go/listers/coordination/v1:go_default_library", + "//staging/src/k8s.io/client-go/tools/cache:go_default_library", + "//vendor/k8s.io/klog/v2:go_default_library", + ], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], + visibility = ["//visibility:public"], +) diff --git a/test/integration/master/BUILD b/test/integration/master/BUILD index de62dad8fd8..73e39fc655d 100644 --- a/test/integration/master/BUILD +++ b/test/integration/master/BUILD @@ -29,6 +29,7 @@ go_test( "//staging/src/k8s.io/api/admission/v1beta1:go_default_library", "//staging/src/k8s.io/api/admissionregistration/v1beta1:go_default_library", "//staging/src/k8s.io/api/apps/v1:go_default_library", + "//staging/src/k8s.io/api/coordination/v1:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/api/networking/v1:go_default_library", "//staging/src/k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1beta1:go_default_library", From e363709d731b55aaa111105c1e49f19f203ae8bd Mon Sep 17 00:00:00 2001 From: Haowei Cai Date: Tue, 10 Nov 2020 15:30:14 -0800 Subject: [PATCH 4/4] add V(4) log when apiserver lease was deleted before this controller reacts --- .../apiserverleasegc/gc_controller.go | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pkg/controlplane/controller/apiserverleasegc/gc_controller.go b/pkg/controlplane/controller/apiserverleasegc/gc_controller.go index a8621e3b1ee..8d5febf6656 100644 --- a/pkg/controlplane/controller/apiserverleasegc/gc_controller.go +++ b/pkg/controlplane/controller/apiserverleasegc/gc_controller.go @@ -108,7 +108,10 @@ func (c *Controller) gc() { continue } if errors.IsNotFound(err) || lease == nil { - // the lease was deleted by the same GC controller in another apiserver + // In an HA cluster, this can happen if the lease was deleted + // by the same GC controller in another apiserver, which is legit. + // We don't expect other components to delete the lease. + klog.V(4).Infof("cannot find apiserver lease: %v", err) continue } // evaluate lease from apiserver @@ -116,10 +119,15 @@ func (c *Controller) gc() { continue } if err := c.kubeclientset.CoordinationV1().Leases(c.leaseNamespace).Delete( - context.TODO(), lease.Name, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { - // If we get a 404, the lease was deleted by the same GC controller - // in another apiserver. Only log error if we get something other than 404. - klog.Errorf("Error deleting lease: %v", err) + context.TODO(), lease.Name, metav1.DeleteOptions{}); err != nil { + if errors.IsNotFound(err) { + // In an HA cluster, this can happen if the lease was deleted + // by the same GC controller in another apiserver, which is legit. + // We don't expect other components to delete the lease. + klog.V(4).Infof("apiserver lease is gone already: %v", err) + } else { + klog.Errorf("Error deleting lease: %v", err) + } } } }