Merge pull request #120490 from p0lyn0mial/upstream-watch-list-cache-metrics
collect watch-list requests latency metric
This commit is contained in:
		@@ -267,7 +267,7 @@ func ListResource(r rest.Lister, rw rest.Watcher, scope *RequestScope, forceWatc
 | 
			
		||||
			}
 | 
			
		||||
			requestInfo, _ := request.RequestInfoFrom(ctx)
 | 
			
		||||
			metrics.RecordLongRunning(req, requestInfo, metrics.APIServerComponent, func() {
 | 
			
		||||
				serveWatch(watcher, scope, outputMediaType, req, w, timeout)
 | 
			
		||||
				serveWatch(watcher, scope, outputMediaType, req, w, timeout, metrics.CleanListScope(ctx, &opts))
 | 
			
		||||
			})
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
 
 | 
			
		||||
@@ -34,6 +34,9 @@ import (
 | 
			
		||||
	"k8s.io/apiserver/pkg/endpoints/handlers/negotiation"
 | 
			
		||||
	"k8s.io/apiserver/pkg/endpoints/metrics"
 | 
			
		||||
	apirequest "k8s.io/apiserver/pkg/endpoints/request"
 | 
			
		||||
	"k8s.io/apiserver/pkg/features"
 | 
			
		||||
	"k8s.io/apiserver/pkg/storage"
 | 
			
		||||
	utilfeature "k8s.io/apiserver/pkg/util/feature"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// nothing will ever be sent down this channel
 | 
			
		||||
@@ -61,7 +64,7 @@ func (w *realTimeoutFactory) TimeoutCh() (<-chan time.Time, func() bool) {
 | 
			
		||||
 | 
			
		||||
// serveWatch will serve a watch response.
 | 
			
		||||
// TODO: the functionality in this method and in WatchServer.Serve is not cleanly decoupled.
 | 
			
		||||
func serveWatch(watcher watch.Interface, scope *RequestScope, mediaTypeOptions negotiation.MediaTypeOptions, req *http.Request, w http.ResponseWriter, timeout time.Duration) {
 | 
			
		||||
func serveWatch(watcher watch.Interface, scope *RequestScope, mediaTypeOptions negotiation.MediaTypeOptions, req *http.Request, w http.ResponseWriter, timeout time.Duration, metricsScope string) {
 | 
			
		||||
	defer watcher.Stop()
 | 
			
		||||
 | 
			
		||||
	options, err := optionsForTransform(mediaTypeOptions, req)
 | 
			
		||||
@@ -153,6 +156,8 @@ func serveWatch(watcher watch.Interface, scope *RequestScope, mediaTypeOptions n
 | 
			
		||||
 | 
			
		||||
		TimeoutFactory:       &realTimeoutFactory{timeout},
 | 
			
		||||
		ServerShuttingDownCh: serverShuttingDownCh,
 | 
			
		||||
 | 
			
		||||
		metricsScope: metricsScope,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	server.ServeHTTP(w, req)
 | 
			
		||||
@@ -176,6 +181,8 @@ type WatchServer struct {
 | 
			
		||||
 | 
			
		||||
	TimeoutFactory       TimeoutFactory
 | 
			
		||||
	ServerShuttingDownCh <-chan struct{}
 | 
			
		||||
 | 
			
		||||
	metricsScope string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ServeHTTP serves a series of encoded events via HTTP with Transfer-Encoding: chunked
 | 
			
		||||
@@ -247,6 +254,7 @@ func (s *WatchServer) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
			metrics.WatchEvents.WithContext(req.Context()).WithLabelValues(kind.Group, kind.Version, kind.Kind).Inc()
 | 
			
		||||
			isWatchListLatencyRecordingRequired := shouldRecordWatchListLatency(event)
 | 
			
		||||
 | 
			
		||||
			if err := s.EmbeddedEncoder.Encode(event.Object, buf); err != nil {
 | 
			
		||||
				// unexpected error
 | 
			
		||||
@@ -280,6 +288,9 @@ func (s *WatchServer) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 | 
			
		||||
			if len(ch) == 0 {
 | 
			
		||||
				flusher.Flush()
 | 
			
		||||
			}
 | 
			
		||||
			if isWatchListLatencyRecordingRequired {
 | 
			
		||||
				metrics.RecordWatchListLatency(req.Context(), s.Scope.Resource, s.metricsScope)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			buf.Reset()
 | 
			
		||||
		}
 | 
			
		||||
@@ -360,3 +371,19 @@ func (s *WatchServer) HandleWS(ws *websocket.Conn) {
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func shouldRecordWatchListLatency(event watch.Event) bool {
 | 
			
		||||
	if event.Type != watch.Bookmark || !utilfeature.DefaultFeatureGate.Enabled(features.WatchList) {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	// as of today the initial-events-end annotation is added only to a single event
 | 
			
		||||
	// by the watch cache and only when certain conditions are met
 | 
			
		||||
	//
 | 
			
		||||
	// for more please read https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/3157-watch-list
 | 
			
		||||
	hasAnnotation, err := storage.HasInitialEventsEndBookmarkAnnotation(event.Object)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		utilruntime.HandleError(fmt.Errorf("unable to determine if the obj has the required annotation for measuring watchlist latency, obj %T: %v", event.Object, err))
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	return hasAnnotation
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -18,6 +18,7 @@ package metrics
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"net/http"
 | 
			
		||||
	"net/url"
 | 
			
		||||
	"strconv"
 | 
			
		||||
@@ -26,8 +27,12 @@ import (
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	restful "github.com/emicklei/go-restful/v3"
 | 
			
		||||
 | 
			
		||||
	metainternalversion "k8s.io/apimachinery/pkg/apis/meta/internalversion"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/apis/meta/v1/validation"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/runtime/schema"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/types"
 | 
			
		||||
	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 | 
			
		||||
	utilsets "k8s.io/apimachinery/pkg/util/sets"
 | 
			
		||||
	"k8s.io/apiserver/pkg/audit"
 | 
			
		||||
	"k8s.io/apiserver/pkg/authentication/user"
 | 
			
		||||
@@ -280,6 +285,17 @@ var (
 | 
			
		||||
		[]string{"code_path"},
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	watchListLatencies = compbasemetrics.NewHistogramVec(
 | 
			
		||||
		&compbasemetrics.HistogramOpts{
 | 
			
		||||
			Subsystem:      APIServerComponent,
 | 
			
		||||
			Name:           "watch_list_duration_seconds",
 | 
			
		||||
			Help:           "Response latency distribution in seconds for watch list requests broken by group, version, resource and scope.",
 | 
			
		||||
			Buckets:        []float64{0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2, 4, 6, 8, 10, 15, 20, 30, 45, 60},
 | 
			
		||||
			StabilityLevel: compbasemetrics.ALPHA,
 | 
			
		||||
		},
 | 
			
		||||
		[]string{"group", "version", "resource", "scope"},
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	metrics = []resettableCollector{
 | 
			
		||||
		deprecatedRequestGauge,
 | 
			
		||||
		requestCounter,
 | 
			
		||||
@@ -300,6 +316,7 @@ var (
 | 
			
		||||
		requestAbortsTotal,
 | 
			
		||||
		requestPostTimeoutTotal,
 | 
			
		||||
		requestTimestampComparisonDuration,
 | 
			
		||||
		watchListLatencies,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// these are the valid request methods which we report in our metrics. Any other request methods
 | 
			
		||||
@@ -511,6 +528,18 @@ func RecordLongRunning(req *http.Request, requestInfo *request.RequestInfo, comp
 | 
			
		||||
	fn()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// RecordWatchListLatency simply records response latency for watch list requests.
 | 
			
		||||
func RecordWatchListLatency(ctx context.Context, gvr schema.GroupVersionResource, metricsScope string) {
 | 
			
		||||
	requestReceivedTimestamp, ok := request.ReceivedTimestampFrom(ctx)
 | 
			
		||||
	if !ok {
 | 
			
		||||
		utilruntime.HandleError(fmt.Errorf("unable to measure watchlist latency because no received ts found in the ctx, gvr: %s", gvr))
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	elapsedSeconds := time.Since(requestReceivedTimestamp).Seconds()
 | 
			
		||||
 | 
			
		||||
	watchListLatencies.WithContext(ctx).WithLabelValues(gvr.Group, gvr.Version, gvr.Resource, metricsScope).Observe(elapsedSeconds)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// MonitorRequest handles standard transformations for client and the reported verb and then invokes Monitor to record
 | 
			
		||||
// a request. verb must be uppercase to be backwards compatible with existing monitoring tooling.
 | 
			
		||||
func MonitorRequest(req *http.Request, verb, group, version, resource, subresource, scope, component string, deprecated bool, removedRelease string, httpCode, respSize int, elapsed time.Duration) {
 | 
			
		||||
@@ -621,6 +650,26 @@ func CleanScope(requestInfo *request.RequestInfo) string {
 | 
			
		||||
	return ""
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// CleanListScope computes the request scope for metrics.
 | 
			
		||||
//
 | 
			
		||||
// Note that normally we would use CleanScope for computation.
 | 
			
		||||
// But due to the same reasons mentioned in determineRequestNamespaceAndName we cannot.
 | 
			
		||||
func CleanListScope(ctx context.Context, opts *metainternalversion.ListOptions) string {
 | 
			
		||||
	namespace, name := determineRequestNamespaceAndName(ctx, opts)
 | 
			
		||||
	if len(name) > 0 {
 | 
			
		||||
		return "resource"
 | 
			
		||||
	}
 | 
			
		||||
	if len(namespace) > 0 {
 | 
			
		||||
		return "namespace"
 | 
			
		||||
	}
 | 
			
		||||
	if requestInfo, ok := request.RequestInfoFrom(ctx); ok {
 | 
			
		||||
		if requestInfo.IsResourceRequest {
 | 
			
		||||
			return "cluster"
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return ""
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// CanonicalVerb distinguishes LISTs from GETs (and HEADs). It assumes verb is
 | 
			
		||||
// UPPERCASE.
 | 
			
		||||
func CanonicalVerb(verb string, scope string) string {
 | 
			
		||||
@@ -655,6 +704,30 @@ func CleanVerb(verb string, request *http.Request, requestInfo *request.RequestI
 | 
			
		||||
	return reportedVerb
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// determineRequestNamespaceAndName computes name and namespace for the given requests
 | 
			
		||||
//
 | 
			
		||||
// note that the logic of this function was copy&pasted from cacher.go
 | 
			
		||||
// after an unsuccessful attempt of moving it to RequestInfo
 | 
			
		||||
//
 | 
			
		||||
// see: https://github.com/kubernetes/kubernetes/pull/120520
 | 
			
		||||
func determineRequestNamespaceAndName(ctx context.Context, opts *metainternalversion.ListOptions) (namespace, name string) {
 | 
			
		||||
	if requestNamespace, ok := request.NamespaceFrom(ctx); ok && len(requestNamespace) > 0 {
 | 
			
		||||
		namespace = requestNamespace
 | 
			
		||||
	} else if opts != nil && opts.FieldSelector != nil {
 | 
			
		||||
		if selectorNamespace, ok := opts.FieldSelector.RequiresExactMatch("metadata.namespace"); ok {
 | 
			
		||||
			namespace = selectorNamespace
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if requestInfo, ok := request.RequestInfoFrom(ctx); ok && requestInfo != nil && len(requestInfo.Name) > 0 {
 | 
			
		||||
		name = requestInfo.Name
 | 
			
		||||
	} else if opts != nil && opts.FieldSelector != nil {
 | 
			
		||||
		if selectorName, ok := opts.FieldSelector.RequiresExactMatch("metadata.name"); ok {
 | 
			
		||||
			name = selectorName
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// cleanVerb additionally ensures that unknown verbs don't clog up the metrics.
 | 
			
		||||
func cleanVerb(verb, suggestedVerb string, request *http.Request, requestInfo *request.RequestInfo) string {
 | 
			
		||||
	// CanonicalVerb (being an input for this function) doesn't handle correctly the
 | 
			
		||||
 
 | 
			
		||||
@@ -17,11 +17,14 @@ limitations under the License.
 | 
			
		||||
package metrics
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"net/http"
 | 
			
		||||
	"net/url"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"testing"
 | 
			
		||||
 | 
			
		||||
	metainternalversion "k8s.io/apimachinery/pkg/apis/meta/internalversion"
 | 
			
		||||
	"k8s.io/apimachinery/pkg/fields"
 | 
			
		||||
	"k8s.io/apiserver/pkg/endpoints/request"
 | 
			
		||||
	"k8s.io/apiserver/pkg/endpoints/responsewriter"
 | 
			
		||||
	"k8s.io/component-base/metrics/legacyregistry"
 | 
			
		||||
@@ -466,3 +469,61 @@ func TestRecordDroppedRequests(t *testing.T) {
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestCleanListScope(t *testing.T) {
 | 
			
		||||
	scenarios := []struct {
 | 
			
		||||
		name          string
 | 
			
		||||
		ctx           context.Context
 | 
			
		||||
		opts          *metainternalversion.ListOptions
 | 
			
		||||
		expectedScope string
 | 
			
		||||
	}{
 | 
			
		||||
		{
 | 
			
		||||
			name: "empty scope",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name: "empty scope with empty request info",
 | 
			
		||||
			ctx:  request.WithRequestInfo(context.TODO(), &request.RequestInfo{}),
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name:          "namespace from ctx",
 | 
			
		||||
			ctx:           request.WithNamespace(context.TODO(), "foo"),
 | 
			
		||||
			expectedScope: "namespace",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name: "namespace from field selector",
 | 
			
		||||
			opts: &metainternalversion.ListOptions{
 | 
			
		||||
				FieldSelector: fields.ParseSelectorOrDie("metadata.namespace=foo"),
 | 
			
		||||
			},
 | 
			
		||||
			expectedScope: "namespace",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name:          "name from request info",
 | 
			
		||||
			ctx:           request.WithRequestInfo(context.TODO(), &request.RequestInfo{Name: "bar"}),
 | 
			
		||||
			expectedScope: "resource",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name: "name from field selector",
 | 
			
		||||
			opts: &metainternalversion.ListOptions{
 | 
			
		||||
				FieldSelector: fields.ParseSelectorOrDie("metadata.name=bar"),
 | 
			
		||||
			},
 | 
			
		||||
			expectedScope: "resource",
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			name:          "cluster scope request",
 | 
			
		||||
			ctx:           request.WithRequestInfo(context.TODO(), &request.RequestInfo{IsResourceRequest: true}),
 | 
			
		||||
			expectedScope: "cluster",
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, scenario := range scenarios {
 | 
			
		||||
		t.Run(scenario.name, func(t *testing.T) {
 | 
			
		||||
			if scenario.ctx == nil {
 | 
			
		||||
				scenario.ctx = context.TODO()
 | 
			
		||||
			}
 | 
			
		||||
			actualScope := CleanListScope(scenario.ctx, scenario.opts)
 | 
			
		||||
			if actualScope != scenario.expectedScope {
 | 
			
		||||
				t.Errorf("unexpected scope = %s, expected = %s", actualScope, scenario.expectedScope)
 | 
			
		||||
			}
 | 
			
		||||
		})
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user