kubelet: add feature gate check for exec probe timeouts
This change also involves adding a custom error type for probe timeouts so that the kubelet exec prober can distinguish between failed probes that have exited or probes that have timed out. Signed-off-by: Andrew Sy Kim <kim.andrewsy@gmail.com>
This commit is contained in:
parent
0b6ef21233
commit
4bb30c3b0e
@ -3,3 +3,4 @@ rules:
|
||||
- selectorRegexp: k8s[.]io/kubernetes
|
||||
allowedPrefixes:
|
||||
- k8s.io/kubernetes/pkg/kubelet/cri
|
||||
- k8s.io/kubernetes/pkg/probe/exec
|
||||
|
@ -17,6 +17,7 @@ go_library(
|
||||
importpath = "k8s.io/kubernetes/pkg/kubelet/cri/remote",
|
||||
deps = [
|
||||
"//pkg/kubelet/cri/remote/util:go_default_library",
|
||||
"//pkg/probe/exec:go_default_library",
|
||||
"//staging/src/k8s.io/component-base/logs/logreduction:go_default_library",
|
||||
"//staging/src/k8s.io/cri-api/pkg/apis:go_default_library",
|
||||
"//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library",
|
||||
|
@ -32,6 +32,7 @@ import (
|
||||
internalapi "k8s.io/cri-api/pkg/apis"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cri/remote/util"
|
||||
"k8s.io/kubernetes/pkg/probe/exec"
|
||||
utilexec "k8s.io/utils/exec"
|
||||
)
|
||||
|
||||
@ -390,14 +391,9 @@ func (r *remoteRuntimeService) ExecSync(containerID string, cmd []string, timeou
|
||||
if err != nil {
|
||||
klog.Errorf("ExecSync %s '%s' from runtime service failed: %v", containerID, strings.Join(cmd, " "), err)
|
||||
|
||||
// If exec timed out, return utilexec.CodeExitError with an exit status as expected
|
||||
// from prober for failed probes.
|
||||
// TODO: utilexec should have a TimedoutError type and we should return it here once available.
|
||||
// interpret DeadlineExceeded gRPC errors as timedout probes
|
||||
if status.Code(err) == codes.DeadlineExceeded {
|
||||
err = utilexec.CodeExitError{
|
||||
Err: fmt.Errorf("command %q timed out", strings.Join(cmd, " ")),
|
||||
Code: 1, // exit code here doesn't really matter, as long as it's not 0
|
||||
}
|
||||
err = exec.NewTimeoutError(fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), timeout)
|
||||
}
|
||||
|
||||
return nil, nil, err
|
||||
|
@ -37,6 +37,7 @@ go_library(
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//pkg/credentialprovider:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/apis/config:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager:go_default_library",
|
||||
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
|
||||
@ -55,11 +56,13 @@ go_library(
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/kubelet/util/cache:go_default_library",
|
||||
"//pkg/kubelet/util/ioutils:go_default_library",
|
||||
"//pkg/probe/exec:go_default_library",
|
||||
"//pkg/util/parsers:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/tools/remotecommand:go_default_library",
|
||||
"//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library",
|
||||
"//vendor/github.com/armon/circbuf:go_default_library",
|
||||
|
@ -27,11 +27,13 @@ import (
|
||||
dockertypes "github.com/docker/docker/api/types"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/client-go/tools/remotecommand"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/probe/exec"
|
||||
|
||||
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
|
||||
utilexec "k8s.io/utils/exec"
|
||||
)
|
||||
|
||||
// ExecHandler knows how to execute a command in a running Docker container.
|
||||
@ -112,8 +114,9 @@ func (*NativeExecHandler) ExecInContainer(client libdocker.Interface, container
|
||||
return err
|
||||
}
|
||||
|
||||
// if ExecProbeTimeout feature gate is disabled, preserve existing behavior to ignore exec timeouts
|
||||
var execTimeout <-chan time.Time
|
||||
if timeout > 0 {
|
||||
if timeout > 0 && utilfeature.DefaultFeatureGate.Enabled(features.ExecProbeTimeout) {
|
||||
execTimeout = time.After(timeout)
|
||||
} else {
|
||||
// skip exec timeout if provided timeout is 0
|
||||
@ -126,13 +129,7 @@ func (*NativeExecHandler) ExecInContainer(client libdocker.Interface, container
|
||||
for {
|
||||
select {
|
||||
case <-execTimeout:
|
||||
// If exec timed out, return utilexec.CodeExitError with an exit status as expected
|
||||
// from prober for failed probes.
|
||||
// TODO: utilexec should have a TimedoutError type and we should return it here once available.
|
||||
return utilexec.CodeExitError{
|
||||
Err: fmt.Errorf("command %q timed out", strings.Join(cmd, " ")),
|
||||
Code: 1, // exit code here doesn't really matter, as long as it's not 0
|
||||
}
|
||||
return exec.NewTimeoutError(fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), timeout)
|
||||
// need to use "default" here instead of <-ticker.C, otherwise we delay the initial InspectExec by 2 seconds.
|
||||
default:
|
||||
inspect, inspectErr := client.InspectExec(execObj.ID)
|
||||
|
@ -8,11 +8,16 @@ load(
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["exec.go"],
|
||||
srcs = [
|
||||
"errors.go",
|
||||
"exec.go",
|
||||
],
|
||||
importpath = "k8s.io/kubernetes/pkg/probe/exec",
|
||||
deps = [
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/util/ioutils:go_default_library",
|
||||
"//pkg/probe:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
"//vendor/k8s.io/klog/v2:go_default_library",
|
||||
"//vendor/k8s.io/utils/exec:go_default_library",
|
||||
],
|
||||
|
47
pkg/probe/exec/errors.go
Normal file
47
pkg/probe/exec/errors.go
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package exec
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// NewTimeoutError returns a new TimeoutError.
|
||||
func NewTimeoutError(err error, timeout time.Duration) *TimeoutError {
|
||||
return &TimeoutError{
|
||||
err: err,
|
||||
timeout: timeout,
|
||||
}
|
||||
}
|
||||
|
||||
// TimeoutError is an error returned on exec probe timeouts. It should be returned by CRI implementations
|
||||
// in order for the exec prober to interpret exec timeouts as failed probes.
|
||||
// TODO: this error type can likely be removed when we support CRI errors.
|
||||
type TimeoutError struct {
|
||||
err error
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// Error returns the error string.
|
||||
func (t *TimeoutError) Error() string {
|
||||
return t.err.Error()
|
||||
}
|
||||
|
||||
// Timeout returns the timeout duration of the exec probe.
|
||||
func (t *TimeoutError) Timeout() time.Duration {
|
||||
return t.timeout
|
||||
}
|
@ -19,6 +19,8 @@ package exec
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util/ioutils"
|
||||
"k8s.io/kubernetes/pkg/probe"
|
||||
|
||||
@ -66,6 +68,16 @@ func (pr execProber) Probe(e exec.Cmd) (probe.Result, string, error) {
|
||||
}
|
||||
return probe.Failure, string(data), nil
|
||||
}
|
||||
|
||||
timeoutErr, ok := err.(*TimeoutError)
|
||||
if ok {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.ExecProbeTimeout) {
|
||||
return probe.Failure, string(data), nil
|
||||
}
|
||||
|
||||
klog.Warningf("Exec probe timed out after %s but ExecProbeTimeout feature gate was disabled", timeoutErr.Timeout())
|
||||
}
|
||||
|
||||
return probe.Unknown, "", err
|
||||
}
|
||||
return probe.Success, string(data), nil
|
||||
|
Loading…
Reference in New Issue
Block a user