Added rotation metric to certificate manager
This commit is contained in:
		@@ -113,8 +113,17 @@ type Config struct {
 | 
				
			|||||||
	// quickly replaced with a unique cert/key pair.
 | 
						// quickly replaced with a unique cert/key pair.
 | 
				
			||||||
	BootstrapKeyPEM []byte
 | 
						BootstrapKeyPEM []byte
 | 
				
			||||||
	// CertificateExpiration will record a metric that shows the remaining
 | 
						// CertificateExpiration will record a metric that shows the remaining
 | 
				
			||||||
	// lifetime of the certificate.
 | 
						// lifetime of the certificate. This metric is a gauge because only the
 | 
				
			||||||
 | 
						// current cert expiry time is really useful. Reading this metric at any
 | 
				
			||||||
 | 
						// time simply gives the next expiration date, no need to keep some
 | 
				
			||||||
 | 
						// history (histogram) of all previous expiry dates.
 | 
				
			||||||
	CertificateExpiration Gauge
 | 
						CertificateExpiration Gauge
 | 
				
			||||||
 | 
						// CertificateRotation will record a metric showing the time in seconds
 | 
				
			||||||
 | 
						// that certificates lived before being rotated. This metric is a histogram
 | 
				
			||||||
 | 
						// because there is value in keeping a history of rotation cadences. It
 | 
				
			||||||
 | 
						// allows one to setup monitoring and alerting of unexpected rotation
 | 
				
			||||||
 | 
						// behavior and track trends in rotation frequency.
 | 
				
			||||||
 | 
						CertificateRotation Histogram
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Store is responsible for getting and updating the current certificate.
 | 
					// Store is responsible for getting and updating the current certificate.
 | 
				
			||||||
@@ -139,6 +148,12 @@ type Gauge interface {
 | 
				
			|||||||
	Set(float64)
 | 
						Set(float64)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Histogram will record the time a rotated certificate was used before being
 | 
				
			||||||
 | 
					// rotated.
 | 
				
			||||||
 | 
					type Histogram interface {
 | 
				
			||||||
 | 
						Observe(float64)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// NoCertKeyError indicates there is no cert/key currently available.
 | 
					// NoCertKeyError indicates there is no cert/key currently available.
 | 
				
			||||||
type NoCertKeyError string
 | 
					type NoCertKeyError string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -163,6 +178,7 @@ type manager struct {
 | 
				
			|||||||
	certStore Store
 | 
						certStore Store
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	certificateExpiration Gauge
 | 
						certificateExpiration Gauge
 | 
				
			||||||
 | 
						certificateRotation   Histogram
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// the following variables must only be accessed under certAccessLock
 | 
						// the following variables must only be accessed under certAccessLock
 | 
				
			||||||
	certAccessLock sync.RWMutex
 | 
						certAccessLock sync.RWMutex
 | 
				
			||||||
@@ -174,6 +190,9 @@ type manager struct {
 | 
				
			|||||||
	clientFn         CSRClientFunc
 | 
						clientFn         CSRClientFunc
 | 
				
			||||||
	stopCh           chan struct{}
 | 
						stopCh           chan struct{}
 | 
				
			||||||
	stopped          bool
 | 
						stopped          bool
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Set to time.Now but can be stubbed out for testing
 | 
				
			||||||
 | 
						now func() time.Time
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// NewManager returns a new certificate manager. A certificate manager is
 | 
					// NewManager returns a new certificate manager. A certificate manager is
 | 
				
			||||||
@@ -203,6 +222,8 @@ func NewManager(config *Config) (Manager, error) {
 | 
				
			|||||||
		cert:                  cert,
 | 
							cert:                  cert,
 | 
				
			||||||
		forceRotation:         forceRotation,
 | 
							forceRotation:         forceRotation,
 | 
				
			||||||
		certificateExpiration: config.CertificateExpiration,
 | 
							certificateExpiration: config.CertificateExpiration,
 | 
				
			||||||
 | 
							certificateRotation:   config.CertificateRotation,
 | 
				
			||||||
 | 
							now:                   time.Now,
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return &m, nil
 | 
						return &m, nil
 | 
				
			||||||
@@ -215,7 +236,7 @@ func NewManager(config *Config) (Manager, error) {
 | 
				
			|||||||
func (m *manager) Current() *tls.Certificate {
 | 
					func (m *manager) Current() *tls.Certificate {
 | 
				
			||||||
	m.certAccessLock.RLock()
 | 
						m.certAccessLock.RLock()
 | 
				
			||||||
	defer m.certAccessLock.RUnlock()
 | 
						defer m.certAccessLock.RUnlock()
 | 
				
			||||||
	if m.cert != nil && m.cert.Leaf != nil && time.Now().After(m.cert.Leaf.NotAfter) {
 | 
						if m.cert != nil && m.cert.Leaf != nil && m.now().After(m.cert.Leaf.NotAfter) {
 | 
				
			||||||
		klog.V(2).Infof("Current certificate is expired.")
 | 
							klog.V(2).Infof("Current certificate is expired.")
 | 
				
			||||||
		return nil
 | 
							return nil
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -256,7 +277,7 @@ func (m *manager) Start() {
 | 
				
			|||||||
	templateChanged := make(chan struct{})
 | 
						templateChanged := make(chan struct{})
 | 
				
			||||||
	go wait.Until(func() {
 | 
						go wait.Until(func() {
 | 
				
			||||||
		deadline := m.nextRotationDeadline()
 | 
							deadline := m.nextRotationDeadline()
 | 
				
			||||||
		if sleepInterval := deadline.Sub(time.Now()); sleepInterval > 0 {
 | 
							if sleepInterval := deadline.Sub(m.now()); sleepInterval > 0 {
 | 
				
			||||||
			klog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval)
 | 
								klog.V(2).Infof("Waiting %v for next certificate rotation", sleepInterval)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			timer := time.NewTimer(sleepInterval)
 | 
								timer := time.NewTimer(sleepInterval)
 | 
				
			||||||
@@ -421,7 +442,10 @@ func (m *manager) rotateCerts() (bool, error) {
 | 
				
			|||||||
		return false, nil
 | 
							return false, nil
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	m.updateCached(cert)
 | 
						if old := m.updateCached(cert); old != nil && m.certificateRotation != nil {
 | 
				
			||||||
 | 
							m.certificateRotation.Observe(m.now().Sub(old.Leaf.NotBefore).Seconds())
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return true, nil
 | 
						return true, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -490,14 +514,14 @@ func (m *manager) nextRotationDeadline() time.Time {
 | 
				
			|||||||
	// forceRotation is not protected by locks
 | 
						// forceRotation is not protected by locks
 | 
				
			||||||
	if m.forceRotation {
 | 
						if m.forceRotation {
 | 
				
			||||||
		m.forceRotation = false
 | 
							m.forceRotation = false
 | 
				
			||||||
		return time.Now()
 | 
							return m.now()
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	m.certAccessLock.RLock()
 | 
						m.certAccessLock.RLock()
 | 
				
			||||||
	defer m.certAccessLock.RUnlock()
 | 
						defer m.certAccessLock.RUnlock()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if !m.certSatisfiesTemplateLocked() {
 | 
						if !m.certSatisfiesTemplateLocked() {
 | 
				
			||||||
		return time.Now()
 | 
							return m.now()
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	notAfter := m.cert.Leaf.NotAfter
 | 
						notAfter := m.cert.Leaf.NotAfter
 | 
				
			||||||
@@ -523,13 +547,15 @@ var jitteryDuration = func(totalDuration float64) time.Duration {
 | 
				
			|||||||
	return wait.Jitter(time.Duration(totalDuration), 0.2) - time.Duration(totalDuration*0.3)
 | 
						return wait.Jitter(time.Duration(totalDuration), 0.2) - time.Duration(totalDuration*0.3)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// updateCached sets the most recent retrieved cert. It also sets the server
 | 
					// updateCached sets the most recent retrieved cert and returns the old cert.
 | 
				
			||||||
// as assumed healthy.
 | 
					// It also sets the server as assumed healthy.
 | 
				
			||||||
func (m *manager) updateCached(cert *tls.Certificate) {
 | 
					func (m *manager) updateCached(cert *tls.Certificate) *tls.Certificate {
 | 
				
			||||||
	m.certAccessLock.Lock()
 | 
						m.certAccessLock.Lock()
 | 
				
			||||||
	defer m.certAccessLock.Unlock()
 | 
						defer m.certAccessLock.Unlock()
 | 
				
			||||||
	m.serverHealth = true
 | 
						m.serverHealth = true
 | 
				
			||||||
 | 
						old := m.cert
 | 
				
			||||||
	m.cert = cert
 | 
						m.cert = cert
 | 
				
			||||||
 | 
						return old
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// updateServerError takes an error returned by the server and infers
 | 
					// updateServerError takes an error returned by the server and infers
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -163,12 +163,17 @@ func TestNewManagerNoRotation(t *testing.T) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type gaugeMock struct {
 | 
					type metricMock struct {
 | 
				
			||||||
	calls     int
 | 
						calls     int
 | 
				
			||||||
	lastValue float64
 | 
						lastValue float64
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func (g *gaugeMock) Set(v float64) {
 | 
					func (g *metricMock) Set(v float64) {
 | 
				
			||||||
 | 
						g.calls++
 | 
				
			||||||
 | 
						g.lastValue = v
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (g *metricMock) Observe(v float64) {
 | 
				
			||||||
	g.calls++
 | 
						g.calls++
 | 
				
			||||||
	g.lastValue = v
 | 
						g.lastValue = v
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -195,7 +200,7 @@ func TestSetRotationDeadline(t *testing.T) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	for _, tc := range testCases {
 | 
						for _, tc := range testCases {
 | 
				
			||||||
		t.Run(tc.name, func(t *testing.T) {
 | 
							t.Run(tc.name, func(t *testing.T) {
 | 
				
			||||||
			g := gaugeMock{}
 | 
								g := metricMock{}
 | 
				
			||||||
			m := manager{
 | 
								m := manager{
 | 
				
			||||||
				cert: &tls.Certificate{
 | 
									cert: &tls.Certificate{
 | 
				
			||||||
					Leaf: &x509.Certificate{
 | 
										Leaf: &x509.Certificate{
 | 
				
			||||||
@@ -206,6 +211,7 @@ func TestSetRotationDeadline(t *testing.T) {
 | 
				
			|||||||
				getTemplate:           func() *x509.CertificateRequest { return &x509.CertificateRequest{} },
 | 
									getTemplate:           func() *x509.CertificateRequest { return &x509.CertificateRequest{} },
 | 
				
			||||||
				usages:                []certificates.KeyUsage{},
 | 
									usages:                []certificates.KeyUsage{},
 | 
				
			||||||
				certificateExpiration: &g,
 | 
									certificateExpiration: &g,
 | 
				
			||||||
 | 
									now:                   func() time.Time { return now },
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			jitteryDuration = func(float64) time.Duration { return time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7) }
 | 
								jitteryDuration = func(float64) time.Duration { return time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7) }
 | 
				
			||||||
			lowerBound := tc.notBefore.Add(time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7))
 | 
								lowerBound := tc.notBefore.Add(time.Duration(float64(tc.notAfter.Sub(tc.notBefore)) * 0.7))
 | 
				
			||||||
@@ -383,6 +389,7 @@ func TestCertSatisfiesTemplate(t *testing.T) {
 | 
				
			|||||||
			m := manager{
 | 
								m := manager{
 | 
				
			||||||
				cert:        tlsCert,
 | 
									cert:        tlsCert,
 | 
				
			||||||
				getTemplate: func() *x509.CertificateRequest { return tc.template },
 | 
									getTemplate: func() *x509.CertificateRequest { return tc.template },
 | 
				
			||||||
 | 
									now:         time.Now,
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			result := m.certSatisfiesTemplate()
 | 
								result := m.certSatisfiesTemplate()
 | 
				
			||||||
@@ -407,6 +414,7 @@ func TestRotateCertCreateCSRError(t *testing.T) {
 | 
				
			|||||||
		clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) {
 | 
							clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) {
 | 
				
			||||||
			return fakeClient{failureType: createError}, nil
 | 
								return fakeClient{failureType: createError}, nil
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
 | 
							now: func() time.Time { return now },
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if success, err := m.rotateCerts(); success {
 | 
						if success, err := m.rotateCerts(); success {
 | 
				
			||||||
@@ -430,6 +438,7 @@ func TestRotateCertWaitingForResultError(t *testing.T) {
 | 
				
			|||||||
		clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) {
 | 
							clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) {
 | 
				
			||||||
			return fakeClient{failureType: watchError}, nil
 | 
								return fakeClient{failureType: watchError}, nil
 | 
				
			||||||
		},
 | 
							},
 | 
				
			||||||
 | 
							now: func() time.Time { return now },
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	defer func(t time.Duration) { certificateWaitTimeout = t }(certificateWaitTimeout)
 | 
						defer func(t time.Duration) { certificateWaitTimeout = t }(certificateWaitTimeout)
 | 
				
			||||||
@@ -945,6 +954,40 @@ func TestServerHealth(t *testing.T) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestRotationLogsDuration(t *testing.T) {
 | 
				
			||||||
 | 
						h := metricMock{}
 | 
				
			||||||
 | 
						now := time.Now()
 | 
				
			||||||
 | 
						certIss := now.Add(-2 * time.Hour)
 | 
				
			||||||
 | 
						m := manager{
 | 
				
			||||||
 | 
							cert: &tls.Certificate{
 | 
				
			||||||
 | 
								Leaf: &x509.Certificate{
 | 
				
			||||||
 | 
									NotBefore: certIss,
 | 
				
			||||||
 | 
									NotAfter:  now.Add(-1 * time.Hour),
 | 
				
			||||||
 | 
								},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							certStore:   &fakeStore{cert: expiredStoreCertData.certificate},
 | 
				
			||||||
 | 
							getTemplate: func() *x509.CertificateRequest { return &x509.CertificateRequest{} },
 | 
				
			||||||
 | 
							clientFn: func(_ *tls.Certificate) (certificatesclient.CertificateSigningRequestInterface, error) {
 | 
				
			||||||
 | 
								return &fakeClient{
 | 
				
			||||||
 | 
									certificatePEM: apiServerCertData.certificatePEM,
 | 
				
			||||||
 | 
								}, nil
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							certificateRotation: &h,
 | 
				
			||||||
 | 
							now:                 func() time.Time { return now },
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						ok, err := m.rotateCerts()
 | 
				
			||||||
 | 
						if err != nil || !ok {
 | 
				
			||||||
 | 
							t.Errorf("failed to rotate certs: %v", err)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if h.calls != 1 {
 | 
				
			||||||
 | 
							t.Errorf("rotation metric was not called")
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if h.lastValue != now.Sub(certIss).Seconds() {
 | 
				
			||||||
 | 
							t.Errorf("rotation metric did not record the right value got: %f; want %f", h.lastValue, now.Sub(certIss).Seconds())
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type fakeClientFailureType int
 | 
					type fakeClientFailureType int
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const (
 | 
					const (
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user