Merge pull request #58930 from smarterclayton/background_rotate

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Only rotate certificates in the background

Change the Kubelet to not block until the first certs have rotated (we didn't act on it anyway) and fall back to the bootstrap cert if the most recent rotated cert is expired on startup.

The certificate manager originally had a "block on startup" rotation behavior to ensure at least one rotation happened on startup. However, since rotation may not succeed within the first time window the code was changed to simply print the error rather than return it. This meant that the blocking rotation has no purpose - it cannot cause the kubelet to fail, and it *does* block the kubelet from starting static pods before the api server becomes available.

The current block behavior causes a bootstrapped kubelet that is also set to run static pods to wait several minutes before actually launching the static pods, which means self-hosted masters using static pods have a pointless delay on startup.

Since blocking rotation has no benefit and can't actually fail startup, this commit removes the blocking behavior and simplifies the code at the same time. The goroutine for rotation now completely owns the deadline, the shouldRotate() method is removed, and the method that sets rotationDeadline now returns it. We also explicitly guard against a negative sleep interval and omit the message.

Should have no impact on bootstrapping except the removal of a long delay on startup before static pods start.

The other change is that an expired certificate from the cert manager is *not* considered a valid cert, which triggers an immediate rotation.  This causes the cert manager to fall back to the original bootstrap certificate until a new certificate is issued.  This allows the bootstrap certificate on masters to be "higher powered" and allow the node to function prior to initial approval, which means someone configuring the masters with a pre-generated client cert can be guaranteed that the kubelet will be able to communicate to report self-hosted static pod status, even if the first client rotation hasn't happened.  This makes master self-hosting more predictable for static configuration environments.

```release-note
When using client or server certificate rotation, the Kubelet will no longer wait until the initial rotation succeeds or fails before starting static pods.  This makes running self-hosted masters with rotation more predictable.
```
This commit is contained in:
Kubernetes Submit Queue
2018-02-01 12:05:15 -08:00
committed by GitHub
3 changed files with 41 additions and 102 deletions

View File

@@ -774,7 +774,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
kubeDeps.TLSOptions.Config.GetCertificate = func(*tls.ClientHelloInfo) (*tls.Certificate, error) { kubeDeps.TLSOptions.Config.GetCertificate = func(*tls.ClientHelloInfo) (*tls.Certificate, error) {
cert := klet.serverCertificateManager.Current() cert := klet.serverCertificateManager.Current()
if cert == nil { if cert == nil {
return nil, fmt.Errorf("no certificate available") return nil, fmt.Errorf("no serving certificate available for the kubelet")
} }
return cert, nil return cert, nil
} }

View File

@@ -141,7 +141,6 @@ type manager struct {
certStore Store certStore Store
certAccessLock sync.RWMutex certAccessLock sync.RWMutex
cert *tls.Certificate cert *tls.Certificate
rotationDeadline time.Time
forceRotation bool forceRotation bool
certificateExpiration Gauge certificateExpiration Gauge
serverHealth bool serverHealth bool
@@ -208,8 +207,7 @@ func (m *manager) SetCertificateSigningRequestClient(certSigningRequestClient ce
func (m *manager) Start() { func (m *manager) Start() {
// Certificate rotation depends on access to the API server certificate // Certificate rotation depends on access to the API server certificate
// signing API, so don't start the certificate manager if we don't have a // signing API, so don't start the certificate manager if we don't have a
// client. This will happen on the cluster master, where the kubelet is // client.
// responsible for bootstrapping the pods of the master components.
if m.certSigningRequestClient == nil { if m.certSigningRequestClient == nil {
glog.V(2).Infof("Certificate rotation is not enabled, no connection to the apiserver.") glog.V(2).Infof("Certificate rotation is not enabled, no connection to the apiserver.")
return return
@@ -217,16 +215,11 @@ func (m *manager) Start() {
glog.V(2).Infof("Certificate rotation is enabled.") glog.V(2).Infof("Certificate rotation is enabled.")
m.setRotationDeadline() go wait.Forever(func() {
deadline := m.nextRotationDeadline()
// Synchronously request a certificate before entering the background if sleepInterval := deadline.Sub(time.Now()); sleepInterval > 0 {
// loop to allow bootstrap scenarios, where the certificate manager glog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval)
// doesn't have a certificate at all yet. time.Sleep(sleepInterval)
if m.shouldRotate() {
glog.V(1).Infof("shouldRotate() is true, forcing immediate rotation")
if _, err := m.rotateCerts(); err != nil {
utilruntime.HandleError(fmt.Errorf("Could not rotate certificates: %v", err))
}
} }
backoff := wait.Backoff{ backoff := wait.Backoff{
Duration: 2 * time.Second, Duration: 2 * time.Second,
@@ -234,10 +227,6 @@ func (m *manager) Start() {
Jitter: 0.1, Jitter: 0.1,
Steps: 5, Steps: 5,
} }
go wait.Forever(func() {
sleepInterval := m.rotationDeadline.Sub(time.Now())
glog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval)
time.Sleep(sleepInterval)
if err := wait.ExponentialBackoff(backoff, m.rotateCerts); err != nil { if err := wait.ExponentialBackoff(backoff, m.rotateCerts); err != nil {
utilruntime.HandleError(fmt.Errorf("Reached backoff limit, still unable to rotate certs: %v", err)) utilruntime.HandleError(fmt.Errorf("Reached backoff limit, still unable to rotate certs: %v", err))
wait.PollInfinite(32*time.Second, m.rotateCerts) wait.PollInfinite(32*time.Second, m.rotateCerts)
@@ -252,12 +241,15 @@ func getCurrentCertificateOrBootstrap(
currentCert, err := store.Current() currentCert, err := store.Current()
if err == nil { if err == nil {
// if the current cert is expired, fall back to the bootstrap cert
if currentCert.Leaf != nil && time.Now().Before(currentCert.Leaf.NotAfter) {
return currentCert, false, nil return currentCert, false, nil
} }
} else {
if _, ok := err.(*NoCertKeyError); !ok { if _, ok := err.(*NoCertKeyError); !ok {
return nil, false, err return nil, false, err
} }
}
if bootstrapCertificatePEM == nil || bootstrapKeyPEM == nil { if bootstrapCertificatePEM == nil || bootstrapKeyPEM == nil {
return nil, true, nil return nil, true, nil
@@ -279,20 +271,6 @@ func getCurrentCertificateOrBootstrap(
return &bootstrapCert, true, nil return &bootstrapCert, true, nil
} }
// shouldRotate looks at how close the current certificate is to expiring and
// decides if it is time to rotate or not.
func (m *manager) shouldRotate() bool {
m.certAccessLock.RLock()
defer m.certAccessLock.RUnlock()
if m.cert == nil {
return true
}
if m.forceRotation {
return true
}
return time.Now().After(m.rotationDeadline)
}
// rotateCerts attempts to request a client cert from the server, wait a reasonable // rotateCerts attempts to request a client cert from the server, wait a reasonable
// period of time for it to be signed, and then update the cert on disk. If it cannot // period of time for it to be signed, and then update the cert on disk. If it cannot
// retrieve a cert, it will return false. It will only return error in exceptional cases. // retrieve a cert, it will return false. It will only return error in exceptional cases.
@@ -349,30 +327,34 @@ func (m *manager) rotateCerts() (bool, error) {
} }
m.updateCached(cert) m.updateCached(cert)
m.setRotationDeadline()
m.forceRotation = false
return true, nil return true, nil
} }
// setRotationDeadline sets a cached value for the threshold at which the // nextRotationDeadline returns a value for the threshold at which the
// current certificate should be rotated, 80%+/-10% of the expiration of the // current certificate should be rotated, 80%+/-10% of the expiration of the
// certificate. // certificate.
func (m *manager) setRotationDeadline() { func (m *manager) nextRotationDeadline() time.Time {
// forceRotation is not protected by locks
if m.forceRotation {
m.forceRotation = false
return time.Now()
}
m.certAccessLock.RLock() m.certAccessLock.RLock()
defer m.certAccessLock.RUnlock() defer m.certAccessLock.RUnlock()
if m.cert == nil { if m.cert == nil {
m.rotationDeadline = time.Now() return time.Now()
return
} }
notAfter := m.cert.Leaf.NotAfter notAfter := m.cert.Leaf.NotAfter
totalDuration := float64(notAfter.Sub(m.cert.Leaf.NotBefore)) totalDuration := float64(notAfter.Sub(m.cert.Leaf.NotBefore))
deadline := m.cert.Leaf.NotBefore.Add(jitteryDuration(totalDuration))
m.rotationDeadline = m.cert.Leaf.NotBefore.Add(jitteryDuration(totalDuration)) glog.V(2).Infof("Certificate expiration is %v, rotation deadline is %v", notAfter, deadline)
glog.V(2).Infof("Certificate expiration is %v, rotation deadline is %v", notAfter, m.rotationDeadline)
if m.certificateExpiration != nil { if m.certificateExpiration != nil {
m.certificateExpiration.Set(float64(notAfter.Unix())) m.certificateExpiration.Set(float64(notAfter.Unix()))
} }
return deadline
} }
// jitteryDuration uses some jitter to set the rotation threshold so each node // jitteryDuration uses some jitter to set the rotation threshold so each node

View File

@@ -146,46 +146,6 @@ func TestNewManagerNoRotation(t *testing.T) {
} }
} }
func TestShouldRotate(t *testing.T) {
now := time.Now()
tests := []struct {
name string
notBefore time.Time
notAfter time.Time
shouldRotate bool
}{
{"just issued, still good", now.Add(-1 * time.Hour), now.Add(99 * time.Hour), false},
{"half way expired, still good", now.Add(-24 * time.Hour), now.Add(24 * time.Hour), false},
{"mostly expired, still good", now.Add(-69 * time.Hour), now.Add(31 * time.Hour), false},
{"just about expired, should rotate", now.Add(-91 * time.Hour), now.Add(9 * time.Hour), true},
{"nearly expired, should rotate", now.Add(-99 * time.Hour), now.Add(1 * time.Hour), true},
{"already expired, should rotate", now.Add(-10 * time.Hour), now.Add(-1 * time.Hour), true},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
m := manager{
cert: &tls.Certificate{
Leaf: &x509.Certificate{
NotBefore: test.notBefore,
NotAfter: test.notAfter,
},
},
template: &x509.CertificateRequest{},
usages: []certificates.KeyUsage{},
}
m.setRotationDeadline()
if m.shouldRotate() != test.shouldRotate {
t.Errorf("Time %v, a certificate issued for (%v, %v) should rotate should be %t.",
now,
m.cert.Leaf.NotBefore,
m.cert.Leaf.NotAfter,
test.shouldRotate)
}
})
}
}
type gaugeMock struct { type gaugeMock struct {
calls int calls int
lastValue float64 lastValue float64
@@ -233,13 +193,13 @@ func TestSetRotationDeadline(t *testing.T) {
jitteryDuration = func(float64) time.Duration { return time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7) } jitteryDuration = func(float64) time.Duration { return time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7) }
lowerBound := tc.notBefore.Add(time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7)) lowerBound := tc.notBefore.Add(time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7))
m.setRotationDeadline() deadline := m.nextRotationDeadline()
if !m.rotationDeadline.Equal(lowerBound) { if !deadline.Equal(lowerBound) {
t.Errorf("For notBefore %v, notAfter %v, the rotationDeadline %v should be %v.", t.Errorf("For notBefore %v, notAfter %v, the rotationDeadline %v should be %v.",
tc.notBefore, tc.notBefore,
tc.notAfter, tc.notAfter,
m.rotationDeadline, deadline,
lowerBound) lowerBound)
} }
if g.calls != 1 { if g.calls != 1 {
@@ -321,7 +281,7 @@ func TestNewManagerBootstrap(t *testing.T) {
} }
if m, ok := cm.(*manager); !ok { if m, ok := cm.(*manager); !ok {
t.Errorf("Expected a '*manager' from 'NewManager'") t.Errorf("Expected a '*manager' from 'NewManager'")
} else if !m.shouldRotate() { } else if !m.forceRotation {
t.Errorf("Expected rotation should happen during bootstrap, but it won't.") t.Errorf("Expected rotation should happen during bootstrap, but it won't.")
} }
} }
@@ -360,9 +320,8 @@ func TestNewManagerNoBootstrap(t *testing.T) {
if m, ok := cm.(*manager); !ok { if m, ok := cm.(*manager); !ok {
t.Errorf("Expected a '*manager' from 'NewManager'") t.Errorf("Expected a '*manager' from 'NewManager'")
} else { } else {
m.setRotationDeadline() if m.forceRotation {
if m.shouldRotate() { t.Errorf("Expected rotation should not happen during bootstrap, but it won't.")
t.Errorf("Expected rotation should happen during bootstrap, but it won't.")
} }
} }
} }
@@ -515,8 +474,7 @@ func TestInitializeCertificateSigningRequestClient(t *testing.T) {
if m, ok := certificateManager.(*manager); !ok { if m, ok := certificateManager.(*manager); !ok {
t.Errorf("Expected a '*manager' from 'NewManager'") t.Errorf("Expected a '*manager' from 'NewManager'")
} else { } else {
m.setRotationDeadline() if m.forceRotation {
if m.shouldRotate() {
if success, err := m.rotateCerts(); !success { if success, err := m.rotateCerts(); !success {
t.Errorf("Got failure from 'rotateCerts', wanted success.") t.Errorf("Got failure from 'rotateCerts', wanted success.")
} else if err != nil { } else if err != nil {
@@ -614,8 +572,7 @@ func TestInitializeOtherRESTClients(t *testing.T) {
if m, ok := certificateManager.(*manager); !ok { if m, ok := certificateManager.(*manager); !ok {
t.Errorf("Expected a '*manager' from 'NewManager'") t.Errorf("Expected a '*manager' from 'NewManager'")
} else { } else {
m.setRotationDeadline() if m.forceRotation {
if m.shouldRotate() {
success, err := certificateManager.(*manager).rotateCerts() success, err := certificateManager.(*manager).rotateCerts()
if err != nil { if err != nil {
t.Errorf("Got error %v, expected none.", err) t.Errorf("Got error %v, expected none.", err)