Merge pull request #121970 from pohly/log-apimachinery-runtime
apimachinery runtime: support contextual logging
This commit is contained in:
		| @@ -127,6 +127,7 @@ linters-settings: # please keep this alphabetized | ||||
|           # The following packages have been migrated to contextual logging. | ||||
|           # Packages matched here do not have to be listed above because | ||||
|           # "contextual" implies "structured". | ||||
|           contextual k8s.io/apimachinery/pkg/util/runtime/.* | ||||
|           contextual k8s.io/client-go/metadata/.* | ||||
|           contextual k8s.io/client-go/tools/events/.* | ||||
|           contextual k8s.io/client-go/tools/record/.* | ||||
|   | ||||
| @@ -173,6 +173,7 @@ linters-settings: # please keep this alphabetized | ||||
|           # The following packages have been migrated to contextual logging. | ||||
|           # Packages matched here do not have to be listed above because | ||||
|           # "contextual" implies "structured". | ||||
|           contextual k8s.io/apimachinery/pkg/util/runtime/.* | ||||
|           contextual k8s.io/client-go/metadata/.* | ||||
|           contextual k8s.io/client-go/tools/events/.* | ||||
|           contextual k8s.io/client-go/tools/record/.* | ||||
|   | ||||
| @@ -176,6 +176,7 @@ linters-settings: # please keep this alphabetized | ||||
|           # The following packages have been migrated to contextual logging. | ||||
|           # Packages matched here do not have to be listed above because | ||||
|           # "contextual" implies "structured". | ||||
|           contextual k8s.io/apimachinery/pkg/util/runtime/.* | ||||
|           contextual k8s.io/client-go/metadata/.* | ||||
|           contextual k8s.io/client-go/tools/events/.* | ||||
|           contextual k8s.io/client-go/tools/record/.* | ||||
|   | ||||
| @@ -24,6 +24,7 @@ structured k8s.io/apiserver/pkg/server/options/encryptionconfig/.* | ||||
| # The following packages have been migrated to contextual logging. | ||||
| # Packages matched here do not have to be listed above because | ||||
| # "contextual" implies "structured". | ||||
| contextual k8s.io/apimachinery/pkg/util/runtime/.* | ||||
| contextual k8s.io/client-go/metadata/.* | ||||
| contextual k8s.io/client-go/tools/events/.* | ||||
| contextual k8s.io/client-go/tools/record/.* | ||||
|   | ||||
| @@ -17,6 +17,7 @@ limitations under the License. | ||||
| package runtime | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"net/http" | ||||
| 	"runtime" | ||||
| @@ -35,7 +36,7 @@ var ( | ||||
| ) | ||||
|  | ||||
| // PanicHandlers is a list of functions which will be invoked when a panic happens. | ||||
| var PanicHandlers = []func(interface{}){logPanic} | ||||
| var PanicHandlers = []func(context.Context, interface{}){logPanic} | ||||
|  | ||||
| // HandleCrash simply catches a crash and logs an error. Meant to be called via | ||||
| // defer.  Additional context-specific handlers can be provided, and will be | ||||
| @@ -43,23 +44,54 @@ var PanicHandlers = []func(interface{}){logPanic} | ||||
| // handlers and logging the panic message. | ||||
| // | ||||
| // E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully. | ||||
| // | ||||
| // TODO(pohly): logcheck:context // HandleCrashWithContext should be used instead of HandleCrash in code which supports contextual logging. | ||||
| func HandleCrash(additionalHandlers ...func(interface{})) { | ||||
| 	if r := recover(); r != nil { | ||||
| 		for _, fn := range PanicHandlers { | ||||
| 			fn(r) | ||||
| 		} | ||||
| 		for _, fn := range additionalHandlers { | ||||
| 			fn(r) | ||||
| 		} | ||||
| 		if ReallyCrash { | ||||
| 			// Actually proceed to panic. | ||||
| 			panic(r) | ||||
| 		additionalHandlersWithContext := make([]func(context.Context, interface{}), len(additionalHandlers)) | ||||
| 		for i, handler := range additionalHandlers { | ||||
| 			handler := handler // capture loop variable | ||||
| 			additionalHandlersWithContext[i] = func(_ context.Context, r interface{}) { | ||||
| 				handler(r) | ||||
| 			} | ||||
| 		} | ||||
|  | ||||
| 		handleCrash(context.Background(), r, additionalHandlersWithContext...) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // HandleCrashWithContext simply catches a crash and logs an error. Meant to be called via | ||||
| // defer.  Additional context-specific handlers can be provided, and will be | ||||
| // called in case of panic.  HandleCrash actually crashes, after calling the | ||||
| // handlers and logging the panic message. | ||||
| // | ||||
| // E.g., you can provide one or more additional handlers for something like shutting down go routines gracefully. | ||||
| // | ||||
| // The context is used to determine how to log. | ||||
| func HandleCrashWithContext(ctx context.Context, additionalHandlers ...func(context.Context, interface{})) { | ||||
| 	if r := recover(); r != nil { | ||||
| 		handleCrash(ctx, r, additionalHandlers...) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // handleCrash is the common implementation of HandleCrash and HandleCrash. | ||||
| // Having those call a common implementation ensures that the stack depth | ||||
| // is the same regardless through which path the handlers get invoked. | ||||
| func handleCrash(ctx context.Context, r any, additionalHandlers ...func(context.Context, interface{})) { | ||||
| 	for _, fn := range PanicHandlers { | ||||
| 		fn(ctx, r) | ||||
| 	} | ||||
| 	for _, fn := range additionalHandlers { | ||||
| 		fn(ctx, r) | ||||
| 	} | ||||
| 	if ReallyCrash { | ||||
| 		// Actually proceed to panic. | ||||
| 		panic(r) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // logPanic logs the caller tree when a panic occurs (except in the special case of http.ErrAbortHandler). | ||||
| func logPanic(r interface{}) { | ||||
| func logPanic(ctx context.Context, r interface{}) { | ||||
| 	if r == http.ErrAbortHandler { | ||||
| 		// honor the http.ErrAbortHandler sentinel panic value: | ||||
| 		//   ErrAbortHandler is a sentinel panic value to abort a handler. | ||||
| @@ -73,10 +105,20 @@ func logPanic(r interface{}) { | ||||
| 	const size = 64 << 10 | ||||
| 	stacktrace := make([]byte, size) | ||||
| 	stacktrace = stacktrace[:runtime.Stack(stacktrace, false)] | ||||
|  | ||||
| 	// We don't really know how many call frames to skip because the Go | ||||
| 	// panic handler is between us and the code where the panic occurred. | ||||
| 	// If it's one function (as in Go 1.21), then skipping four levels | ||||
| 	// gets us to the function which called the `defer HandleCrashWithontext(...)`. | ||||
| 	logger := klog.FromContext(ctx).WithCallDepth(4) | ||||
|  | ||||
| 	// For backwards compatibility, conversion to string | ||||
| 	// is handled here instead of defering to the logging | ||||
| 	// backend. | ||||
| 	if _, ok := r.(string); ok { | ||||
| 		klog.Errorf("Observed a panic: %s\n%s", r, stacktrace) | ||||
| 		logger.Error(nil, "Observed a panic", "panic", r, "stacktrace", string(stacktrace)) | ||||
| 	} else { | ||||
| 		klog.Errorf("Observed a panic: %#v (%v)\n%s", r, r, stacktrace) | ||||
| 		logger.Error(nil, "Observed a panic", "panic", fmt.Sprintf("%v", r), "panicGoValue", fmt.Sprintf("%#v", r), "stacktrace", string(stacktrace)) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @@ -84,35 +126,76 @@ func logPanic(r interface{}) { | ||||
| // error occurs. | ||||
| // TODO(lavalamp): for testability, this and the below HandleError function | ||||
| // should be packaged up into a testable and reusable object. | ||||
| var ErrorHandlers = []func(error){ | ||||
| var ErrorHandlers = []ErrorHandler{ | ||||
| 	logError, | ||||
| 	(&rudimentaryErrorBackoff{ | ||||
| 		lastErrorTime: time.Now(), | ||||
| 		// 1ms was the number folks were able to stomach as a global rate limit. | ||||
| 		// If you need to log errors more than 1000 times a second you | ||||
| 		// should probably consider fixing your code instead. :) | ||||
| 		minPeriod: time.Millisecond, | ||||
| 	}).OnError, | ||||
| 	func(_ context.Context, _ error, _ string, _ ...interface{}) { | ||||
| 		(&rudimentaryErrorBackoff{ | ||||
| 			lastErrorTime: time.Now(), | ||||
| 			// 1ms was the number folks were able to stomach as a global rate limit. | ||||
| 			// If you need to log errors more than 1000 times a second you | ||||
| 			// should probably consider fixing your code instead. :) | ||||
| 			minPeriod: time.Millisecond, | ||||
| 		}).OnError() | ||||
| 	}, | ||||
| } | ||||
|  | ||||
| type ErrorHandler func(ctx context.Context, err error, msg string, keysAndValues ...interface{}) | ||||
|  | ||||
| // HandlerError is a method to invoke when a non-user facing piece of code cannot | ||||
| // return an error and needs to indicate it has been ignored. Invoking this method | ||||
| // is preferable to logging the error - the default behavior is to log but the | ||||
| // errors may be sent to a remote server for analysis. | ||||
| // | ||||
| // TODO(pohly): logcheck:context // HandleErrorWithContext should be used instead of HandleError in code which supports contextual logging. | ||||
| func HandleError(err error) { | ||||
| 	// this is sometimes called with a nil error.  We probably shouldn't fail and should do nothing instead | ||||
| 	if err == nil { | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	handleError(context.Background(), err, "Unhandled Error") | ||||
| } | ||||
|  | ||||
| // HandlerErrorWithContext is a method to invoke when a non-user facing piece of code cannot | ||||
| // return an error and needs to indicate it has been ignored. Invoking this method | ||||
| // is preferable to logging the error - the default behavior is to log but the | ||||
| // errors may be sent to a remote server for analysis. The context is used to | ||||
| // determine how to log the error. | ||||
| // | ||||
| // If contextual logging is enabled, the default log output is equivalent to | ||||
| // | ||||
| //	logr.FromContext(ctx).WithName("UnhandledError").Error(err, msg, keysAndValues...) | ||||
| // | ||||
| // Without contextual logging, it is equivalent to: | ||||
| // | ||||
| //	klog.ErrorS(err, msg, keysAndValues...) | ||||
| // | ||||
| // In contrast to HandleError, passing nil for the error is still going to | ||||
| // trigger a log entry. Don't construct a new error or wrap an error | ||||
| // with fmt.Errorf. Instead, add additional information via the mssage | ||||
| // and key/value pairs. | ||||
| // | ||||
| // This variant should be used instead of HandleError because it supports | ||||
| // structured, contextual logging. | ||||
| func HandleErrorWithContext(ctx context.Context, err error, msg string, keysAndValues ...interface{}) { | ||||
| 	handleError(ctx, err, msg, keysAndValues...) | ||||
| } | ||||
|  | ||||
| // handleError is the common implementation of HandleError and HandleErrorWithContext. | ||||
| // Using this common implementation ensures that the stack depth | ||||
| // is the same regardless through which path the handlers get invoked. | ||||
| func handleError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) { | ||||
| 	for _, fn := range ErrorHandlers { | ||||
| 		fn(err) | ||||
| 		fn(ctx, err, msg, keysAndValues...) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // logError prints an error with the call stack of the location it was reported | ||||
| func logError(err error) { | ||||
| 	klog.ErrorDepth(2, err) | ||||
| // logError prints an error with the call stack of the location it was reported. | ||||
| // It expects to be called as <caller> -> HandleError[WithContext] -> handleError -> logError. | ||||
| func logError(ctx context.Context, err error, msg string, keysAndValues ...interface{}) { | ||||
| 	logger := klog.FromContext(ctx).WithCallDepth(3) | ||||
| 	logger = klog.LoggerWithName(logger, "UnhandledError") | ||||
| 	logger.Error(err, msg, keysAndValues...) //nolint:logcheck // logcheck complains about unknown key/value pairs. | ||||
| } | ||||
|  | ||||
| type rudimentaryErrorBackoff struct { | ||||
| @@ -125,7 +208,7 @@ type rudimentaryErrorBackoff struct { | ||||
|  | ||||
| // OnError will block if it is called more often than the embedded period time. | ||||
| // This will prevent overly tight hot error loops. | ||||
| func (r *rudimentaryErrorBackoff) OnError(error) { | ||||
| func (r *rudimentaryErrorBackoff) OnError() { | ||||
| 	now := time.Now() // start the timer before acquiring the lock | ||||
| 	r.lastErrorTimeLock.Lock() | ||||
| 	d := now.Sub(r.lastErrorTime) | ||||
|   | ||||
| @@ -0,0 +1,71 @@ | ||||
| /* | ||||
| Copyright 2014 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package runtime | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"context" | ||||
| 	"errors" | ||||
| 	"flag" | ||||
| 	"fmt" | ||||
| 	"regexp" | ||||
|  | ||||
| 	"k8s.io/klog/v2" | ||||
| ) | ||||
|  | ||||
| //nolint:logcheck // Several functions are normally not okay in a package. | ||||
| func ExampleHandleErrorWithContext() { | ||||
| 	state := klog.CaptureState() | ||||
| 	defer state.Restore() | ||||
| 	var fs flag.FlagSet | ||||
| 	klog.InitFlags(&fs) | ||||
| 	for flag, value := range map[string]string{ | ||||
| 		"one_output":  "true", | ||||
| 		"logtostderr": "false", | ||||
| 	} { | ||||
| 		if err := fs.Set(flag, value); err != nil { | ||||
| 			fmt.Printf("Unexpected error configuring klog: %v", err) | ||||
| 			return | ||||
| 		} | ||||
| 	} | ||||
| 	var buffer bytes.Buffer | ||||
| 	klog.SetOutput(&buffer) | ||||
|  | ||||
| 	logger := klog.Background() | ||||
| 	logger = klog.LoggerWithValues(logger, "request", 42) | ||||
| 	ctx := klog.NewContext(context.Background(), logger) | ||||
|  | ||||
| 	// The line number of the next call must be at line 60. Here are some | ||||
| 	// blank lines that can be removed to keep the line unchanged. | ||||
| 	// | ||||
| 	// | ||||
| 	// | ||||
| 	// | ||||
| 	// | ||||
| 	// | ||||
| 	HandleErrorWithContext(ctx, errors.New("fake error"), "test") | ||||
|  | ||||
| 	klog.Flush() | ||||
| 	// Strip varying header. Code location should be constant and something | ||||
| 	// that needs to be tested. | ||||
| 	output := buffer.String() | ||||
| 	output = regexp.MustCompile(`^.* ([^[:space:]]*.go:[[:digit:]]*)\] `).ReplaceAllString(output, `xxx $1] `) | ||||
| 	fmt.Print(output) | ||||
|  | ||||
| 	// Output: | ||||
| 	// xxx runtime_stack_test.go:60] "test" err="fake error" logger="UnhandledError" request=42 | ||||
| } | ||||
| @@ -18,6 +18,7 @@ package runtime | ||||
|  | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"net/http" | ||||
| @@ -43,8 +44,8 @@ func TestCustomHandleCrash(t *testing.T) { | ||||
| 	old := PanicHandlers | ||||
| 	defer func() { PanicHandlers = old }() | ||||
| 	var result interface{} | ||||
| 	PanicHandlers = []func(interface{}){ | ||||
| 		func(r interface{}) { | ||||
| 	PanicHandlers = []func(context.Context, interface{}){ | ||||
| 		func(_ context.Context, r interface{}) { | ||||
| 			result = r | ||||
| 		}, | ||||
| 	} | ||||
| @@ -66,8 +67,8 @@ func TestCustomHandleError(t *testing.T) { | ||||
| 	old := ErrorHandlers | ||||
| 	defer func() { ErrorHandlers = old }() | ||||
| 	var result error | ||||
| 	ErrorHandlers = []func(error){ | ||||
| 		func(err error) { | ||||
| 	ErrorHandlers = []ErrorHandler{ | ||||
| 		func(_ context.Context, err error, msg string, keysAndValues ...interface{}) { | ||||
| 			result = err | ||||
| 		}, | ||||
| 	} | ||||
| @@ -101,7 +102,8 @@ func TestHandleCrashLog(t *testing.T) { | ||||
| 	if len(lines) < 4 { | ||||
| 		t.Fatalf("panic log should have 1 line of message, 1 line per goroutine and 2 lines per function call") | ||||
| 	} | ||||
| 	if match, _ := regexp.MatchString("Observed a panic: test panic", lines[0]); !match { | ||||
| 	t.Logf("Got log output:\n%s", strings.Join(lines, "\n")) | ||||
| 	if match, _ := regexp.MatchString(`"Observed a panic" panic="test panic"`, lines[0]); !match { | ||||
| 		t.Errorf("mismatch panic message: %s", lines[0]) | ||||
| 	} | ||||
| 	// The following regexp's verify that Kubernetes panic log matches Golang stdlib | ||||
| @@ -170,7 +172,7 @@ func Test_rudimentaryErrorBackoff_OnError_ParallelSleep(t *testing.T) { | ||||
| 		wg.Add(1) | ||||
| 		go func() { | ||||
| 			<-start | ||||
| 			r.OnError(nil) // input error is ignored | ||||
| 			r.OnError() | ||||
| 			wg.Done() | ||||
| 		}() | ||||
| 	} | ||||
|   | ||||
| @@ -194,8 +194,8 @@ func TestJitterUntilRecoversPanic(t *testing.T) { | ||||
|  | ||||
| 	// Hook up a custom crash handler to ensure it is called when a jitter function panics | ||||
| 	runtime.ReallyCrash = false | ||||
| 	runtime.PanicHandlers = []func(interface{}){ | ||||
| 		func(p interface{}) { | ||||
| 	runtime.PanicHandlers = []func(context.Context, interface{}){ | ||||
| 		func(_ context.Context, p interface{}) { | ||||
| 			handled++ | ||||
| 		}, | ||||
| 	} | ||||
|   | ||||
| @@ -137,9 +137,7 @@ func (t *timeoutHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { | ||||
| 				} | ||||
|  | ||||
| 				metrics.RecordRequestPostTimeout(metrics.PostTimeoutSourceTimeoutHandler, status) | ||||
| 				err := fmt.Errorf("post-timeout activity - time-elapsed: %s, %v %q result: %v", | ||||
| 					time.Since(timedOutAt), r.Method, r.URL.Path, res) | ||||
| 				utilruntime.HandleError(err) | ||||
| 				utilruntime.HandleErrorWithContext(r.Context(), nil, "Post-timeout activity", "timeElapsed", time.Since(timedOutAt), "method", r.Method, "path", r.URL.Path, "result", res) | ||||
| 			}() | ||||
| 		}() | ||||
| 		httplog.SetStacktracePredicate(r.Context(), func(status int) bool { | ||||
|   | ||||
| @@ -408,7 +408,9 @@ func TestErrConnKilled(t *testing.T) { | ||||
| 	if isStackTraceLoggedByRuntime(capturedOutput) { | ||||
| 		t.Errorf("unexpected stack trace in log, actual = %v", capturedOutput) | ||||
| 	} | ||||
| 	if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) { | ||||
| 	// For the sake of simplicity and clarity this matches the full log line. | ||||
| 	// This is not part of the Kubernetes API and could change. | ||||
| 	if !strings.Contains(capturedOutput, `"Timeout or abort while handling" logger="UnhandledError" method="GET" URI="/" auditID=""`) { | ||||
| 		t.Errorf("unexpected output captured actual = %v", capturedOutput) | ||||
| 	} | ||||
| } | ||||
| @@ -501,7 +503,9 @@ func TestErrConnKilledHTTP2(t *testing.T) { | ||||
| 	if isStackTraceLoggedByRuntime(capturedOutput) { | ||||
| 		t.Errorf("unexpected stack trace in log, actual = %v", capturedOutput) | ||||
| 	} | ||||
| 	if !strings.Contains(capturedOutput, `timeout or abort while handling: method=GET URI="/" audit-ID=""`) { | ||||
| 	// For the sake of simplicity and clarity this matches the full log line. | ||||
| 	// This is not part of the Kubernetes API and could change. | ||||
| 	if !strings.Contains(capturedOutput, `"Timeout or abort while handling" logger="UnhandledError" method="GET" URI="/" auditID=""`) { | ||||
| 		t.Errorf("unexpected output captured actual = %v", capturedOutput) | ||||
| 	} | ||||
|  | ||||
|   | ||||
| @@ -17,7 +17,6 @@ limitations under the License. | ||||
| package filters | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"net/http" | ||||
|  | ||||
| 	"k8s.io/apimachinery/pkg/util/runtime" | ||||
| @@ -51,7 +50,7 @@ func WithPanicRecovery(handler http.Handler, resolver request.RequestInfoResolve | ||||
| 			// This call can have different handlers, but the default chain rate limits. Call it after the metrics are updated | ||||
| 			// in case the rate limit delays it.  If you outrun the rate for this one timed out requests, something has gone | ||||
| 			// seriously wrong with your server, but generally having a logging signal for timeouts is useful. | ||||
| 			runtime.HandleError(fmt.Errorf("timeout or abort while handling: method=%v URI=%q audit-ID=%q", req.Method, req.RequestURI, audit.GetAuditIDTruncated(req.Context()))) | ||||
| 			runtime.HandleErrorWithContext(req.Context(), nil, "Timeout or abort while handling", "method", req.Method, "URI", req.RequestURI, "auditID", audit.GetAuditIDTruncated(req.Context())) | ||||
| 			return | ||||
| 		} | ||||
| 		http.Error(w, "This request caused apiserver to panic. Look in the logs for details.", http.StatusInternalServerError) | ||||
|   | ||||
| @@ -79,7 +79,7 @@ func webSocketServerStreams(req *http.Request, w http.ResponseWriter, opts Optio | ||||
| 				if p := recover(); p != nil { | ||||
| 					// Standard panic logging. | ||||
| 					for _, fn := range runtime.PanicHandlers { | ||||
| 						fn(p) | ||||
| 						fn(req.Context(), p) | ||||
| 					} | ||||
| 				} | ||||
| 			}() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot