test: use go-uber/goleak for strict leak checking
It provides more readable output and has additional APIs for using it inside a unit test. goleak.IgnoreCurrent is needed to filter out the goroutine that gets started when importing go.opencensus.io/stats/view. In order to handle background goroutines that get created on demand and cannot be stopped (like the one for LogzHealth), a helper function ensures that those are running before calling goleak.IgnoreCurrent. Keeping those goroutines running is not a problem and thus not worth the effort of adding new APIs to stop them. Other goroutines are genuine leaks for which no fix is available. Those get suppressed via IgnoreTopFunction, which works as long as that function is unique enough. Example output for the leak fixed in https://github.com/kubernetes/kubernetes/pull/115423: E0202 09:30:51.641841 74789 etcd.go:205] "EtcdMain goroutine check" err=< found unexpected goroutines: [Goroutine 4889 in state chan receive, with k8s.io/apimachinery/pkg/watch.(*Broadcaster).loop on top of the stack: goroutine 4889 [chan receive]: k8s.io/apimachinery/pkg/watch.(*Broadcaster).loop(0xc0076183c0) /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/watch/mux.go:268 +0x65 created by k8s.io/apimachinery/pkg/watch.NewBroadcaster /nvme/gopath/src/k8s.io/kubernetes/vendor/k8s.io/apimachinery/pkg/watch/mux.go:77 +0x116 >
This commit is contained in:
@@ -29,10 +29,10 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"go.uber.org/goleak"
|
||||
"google.golang.org/grpc/grpclog"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/kubernetes/pkg/util/env"
|
||||
)
|
||||
|
||||
@@ -182,37 +182,48 @@ func EtcdMain(tests func() int) {
|
||||
// Bail out early when -help was given as parameter.
|
||||
flag.Parse()
|
||||
|
||||
before := runtime.NumGoroutine()
|
||||
// Must be called *before* creating new goroutines.
|
||||
goleakOpts := IgnoreBackgroundGoroutines()
|
||||
|
||||
goleakOpts = append(goleakOpts,
|
||||
// lumberjack leaks a goroutine:
|
||||
// https://github.com/natefinch/lumberjack/issues/56 This affects tests
|
||||
// using --audit-log-path (like
|
||||
// ./test/integration/apiserver/admissionwebhook/reinvocation_test.go).
|
||||
// In normal production that should be harmless. We don't know here
|
||||
// whether the test is using that, so we have to suppress reporting
|
||||
// this leak for all tests.
|
||||
//
|
||||
// Both names occurred in practice.
|
||||
goleak.IgnoreTopFunction("k8s.io/kubernetes/vendor/gopkg.in/natefinch/lumberjack%2ev2.(*Logger).millRun"),
|
||||
goleak.IgnoreTopFunction("gopkg.in/natefinch/lumberjack%2ev2.(*Logger).millRun"),
|
||||
)
|
||||
|
||||
stop, err := startEtcd()
|
||||
if err != nil {
|
||||
klog.Fatalf("cannot run integration tests: unable to start etcd: %v", err)
|
||||
}
|
||||
result := tests()
|
||||
stop() // Don't defer this. See os.Exit documentation.
|
||||
klog.StopFlushDaemon()
|
||||
|
||||
checkNumberOfGoroutines := func() (bool, error) {
|
||||
// We leave some room for leaked goroutines as there are
|
||||
// still some leaks, mostly:
|
||||
// - leak from lumberjack package we're vendoring
|
||||
// - leak from apiserve healthz
|
||||
// - leak from opencensus library
|
||||
// Once fixed, we should be able to bring it down to zero.
|
||||
if dg := runtime.NumGoroutine() - before; dg <= 3 {
|
||||
return true, nil
|
||||
// Several tests don't wait for goroutines to stop. goleak.Find retries
|
||||
// internally, but not long enough. 5 seconds seemed to be enough for
|
||||
// most tests, even when testing in the CI.
|
||||
timeout := 5 * time.Second
|
||||
start := time.Now()
|
||||
for {
|
||||
err := goleak.Find(goleakOpts...)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
if time.Now().Sub(start) >= timeout {
|
||||
klog.ErrorS(err, "EtcdMain goroutine check")
|
||||
result = 1
|
||||
break
|
||||
}
|
||||
// Allow goroutines to schedule and die off.
|
||||
runtime.Gosched()
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// It generally takes visibly less than 1s to finish all goroutines.
|
||||
// But we keep the limit higher to account for cpu-starved environments.
|
||||
if err := wait.Poll(100*time.Millisecond, 5*time.Second, checkNumberOfGoroutines); err != nil {
|
||||
after := runtime.NumGoroutine()
|
||||
stacktraces := make([]byte, 1<<20)
|
||||
runtime.Stack(stacktraces, true)
|
||||
klog.Fatalf("unexpected number of goroutines: before: %d after %d\n%sd", before, after, string(stacktraces))
|
||||
}
|
||||
os.Exit(result)
|
||||
}
|
||||
|
||||
|
36
test/integration/framework/goleak.go
Normal file
36
test/integration/framework/goleak.go
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package framework
|
||||
|
||||
import (
|
||||
"go.uber.org/goleak"
|
||||
"k8s.io/apiserver/pkg/server/healthz"
|
||||
)
|
||||
|
||||
// IgnoreBackgroundGoroutines returns options for goleak.Find
|
||||
// which ignore goroutines created by "go test" and init functions,
|
||||
// like the one from go.opencensus.io/stats/view/worker.go.
|
||||
//
|
||||
// Goroutines that normally get created later when using the apiserver
|
||||
// get created already when calling this function, therefore they
|
||||
// also get ignored.
|
||||
func IgnoreBackgroundGoroutines() []goleak.Option {
|
||||
// Ensure that on-demand goroutines are running.
|
||||
_ = healthz.LogHealthz.Check(nil)
|
||||
|
||||
return []goleak.Option{goleak.IgnoreCurrent()}
|
||||
}
|
Reference in New Issue
Block a user