Cap how long the kubelet waits when it has no client cert

If we go a certain amount of time without being able to create a client
cert and we have no current client cert from the store, exit. This
prevents a corrupted local copy of the cert from leaving the Kubelet in a
zombie state forever. Exiting allows a config loop outside the Kubelet
to clean up the file or the bootstrap client cert to get another client
cert.
This commit is contained in:
Clayton Coleman 2018-02-03 23:18:53 -05:00
parent 19131583eb
commit 0346145615
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
3 changed files with 39 additions and 12 deletions

View File

@ -528,9 +528,11 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) {
if err != nil {
return err
}
// we set exitIfExpired to true because we use this client configuration to request new certs - if we are unable
// to request new certs, we will be unable to continue normal operation
if err := kubeletcertificate.UpdateTransport(wait.NeverStop, clientConfig, clientCertificateManager, true); err != nil {
// we set exitAfter to five minutes because we use this client configuration to request new certs - if we are unable
// to request new certs, we will be unable to continue normal operation. Exiting the process allows a wrapper
// or the bootstrapping credentials to potentially lay down new initial config.
if err := kubeletcertificate.UpdateTransport(wait.NeverStop, clientConfig, clientCertificateManager, 5*time.Minute); err != nil {
return err
}
}

View File

@ -43,15 +43,21 @@ import (
// connections, forcing the client to re-handshake with the server and use the
// new certificate.
//
// The exitAfter duration, if set, will terminate the current process if a certificate
// is not available from the store (because it has been deleted on disk or is corrupt)
// or if the certificate has expired and the server is responsive. This allows the
// process parent or the bootstrap credentials an opportunity to retrieve a new initial
// certificate.
//
// stopCh should be used to indicate when the transport is unused and doesn't need
// to continue checking the manager.
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitIfExpired bool) error {
return updateTransport(stopCh, 10*time.Second, clientConfig, clientCertificateManager, exitIfExpired)
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
return updateTransport(stopCh, 10*time.Second, clientConfig, clientCertificateManager, exitAfter)
}
// updateTransport is an internal method that exposes how often this method checks that the
// client cert has changed. Intended for testing.
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitIfExpired bool) error {
// client cert has changed.
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
if clientConfig.Transport != nil {
return fmt.Errorf("there is already a transport configured")
}
@ -77,16 +83,35 @@ func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig
conns: make(map[*closableConn]struct{}),
}
lastCertAvailable := time.Now()
lastCert := clientCertificateManager.Current()
go wait.Until(func() {
curr := clientCertificateManager.Current()
if exitIfExpired && curr != nil && time.Now().After(curr.Leaf.NotAfter) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
if exitAfter > 0 {
now := time.Now()
if curr == nil {
// the certificate has been deleted from disk or is otherwise corrupt
if now.After(lastCertAvailable.Add(exitAfter)) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("It has been %s since a valid client cert was found and the server is responsive, exiting.", exitAfter)
} else {
glog.Errorf("It has been %s since a valid client cert was found, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.", exitAfter)
}
}
} else {
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
// the certificate is expired
if now.After(curr.Leaf.NotAfter) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
} else {
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
}
}
lastCertAvailable = now
}
}
if curr == nil || lastCert == curr {
// Cert hasn't been rotated.
return

View File

@ -187,7 +187,7 @@ func TestRotateShutsDownConnections(t *testing.T) {
}
// Check for a new cert every 10 milliseconds
if err := updateTransport(stop, 10*time.Millisecond, c, m, false); err != nil {
if err := updateTransport(stop, 10*time.Millisecond, c, m, 0); err != nil {
t.Fatal(err)
}