kubelet: add feature gate check for exec probe timeouts

This change also involves adding a custom error type for probe timeouts
so that the kubelet exec prober can distinguish between failed probes
that have exited or probes that have timed out.

Signed-off-by: Andrew Sy Kim <kim.andrewsy@gmail.com>
This commit is contained in:
Andrew Sy Kim 2020-09-06 13:52:49 -04:00
parent 0b6ef21233
commit 4bb30c3b0e
8 changed files with 79 additions and 17 deletions

View File

@ -3,3 +3,4 @@ rules:
- selectorRegexp: k8s[.]io/kubernetes
allowedPrefixes:
- k8s.io/kubernetes/pkg/kubelet/cri
- k8s.io/kubernetes/pkg/probe/exec

View File

@ -17,6 +17,7 @@ go_library(
importpath = "k8s.io/kubernetes/pkg/kubelet/cri/remote",
deps = [
"//pkg/kubelet/cri/remote/util:go_default_library",
"//pkg/probe/exec:go_default_library",
"//staging/src/k8s.io/component-base/logs/logreduction:go_default_library",
"//staging/src/k8s.io/cri-api/pkg/apis:go_default_library",
"//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library",

View File

@ -32,6 +32,7 @@ import (
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/kubelet/cri/remote/util"
"k8s.io/kubernetes/pkg/probe/exec"
utilexec "k8s.io/utils/exec"
)
@ -390,14 +391,9 @@ func (r *remoteRuntimeService) ExecSync(containerID string, cmd []string, timeou
if err != nil {
klog.Errorf("ExecSync %s '%s' from runtime service failed: %v", containerID, strings.Join(cmd, " "), err)
// If exec timed out, return utilexec.CodeExitError with an exit status as expected
// from prober for failed probes.
// TODO: utilexec should have a TimedoutError type and we should return it here once available.
// interpret DeadlineExceeded gRPC errors as timedout probes
if status.Code(err) == codes.DeadlineExceeded {
err = utilexec.CodeExitError{
Err: fmt.Errorf("command %q timed out", strings.Join(cmd, " ")),
Code: 1, // exit code here doesn't really matter, as long as it's not 0
}
err = exec.NewTimeoutError(fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), timeout)
}
return nil, nil, err

View File

@ -37,6 +37,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/credentialprovider:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/apis/config:go_default_library",
"//pkg/kubelet/checkpointmanager:go_default_library",
"//pkg/kubelet/checkpointmanager/checksum:go_default_library",
@ -55,11 +56,13 @@ go_library(
"//pkg/kubelet/types:go_default_library",
"//pkg/kubelet/util/cache:go_default_library",
"//pkg/kubelet/util/ioutils:go_default_library",
"//pkg/probe/exec:go_default_library",
"//pkg/util/parsers:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/tools/remotecommand:go_default_library",
"//staging/src/k8s.io/cri-api/pkg/apis/runtime/v1alpha2:go_default_library",
"//vendor/github.com/armon/circbuf:go_default_library",

View File

@ -27,11 +27,13 @@ import (
dockertypes "github.com/docker/docker/api/types"
"k8s.io/klog/v2"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/tools/remotecommand"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/probe/exec"
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
utilexec "k8s.io/utils/exec"
)
// ExecHandler knows how to execute a command in a running Docker container.
@ -112,8 +114,9 @@ func (*NativeExecHandler) ExecInContainer(client libdocker.Interface, container
return err
}
// if ExecProbeTimeout feature gate is disabled, preserve existing behavior to ignore exec timeouts
var execTimeout <-chan time.Time
if timeout > 0 {
if timeout > 0 && utilfeature.DefaultFeatureGate.Enabled(features.ExecProbeTimeout) {
execTimeout = time.After(timeout)
} else {
// skip exec timeout if provided timeout is 0
@ -126,13 +129,7 @@ func (*NativeExecHandler) ExecInContainer(client libdocker.Interface, container
for {
select {
case <-execTimeout:
// If exec timed out, return utilexec.CodeExitError with an exit status as expected
// from prober for failed probes.
// TODO: utilexec should have a TimedoutError type and we should return it here once available.
return utilexec.CodeExitError{
Err: fmt.Errorf("command %q timed out", strings.Join(cmd, " ")),
Code: 1, // exit code here doesn't really matter, as long as it's not 0
}
return exec.NewTimeoutError(fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), timeout)
// need to use "default" here instead of <-ticker.C, otherwise we delay the initial InspectExec by 2 seconds.
default:
inspect, inspectErr := client.InspectExec(execObj.ID)

View File

@ -8,11 +8,16 @@ load(
go_library(
name = "go_default_library",
srcs = ["exec.go"],
srcs = [
"errors.go",
"exec.go",
],
importpath = "k8s.io/kubernetes/pkg/probe/exec",
deps = [
"//pkg/features:go_default_library",
"//pkg/kubelet/util/ioutils:go_default_library",
"//pkg/probe:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/klog/v2:go_default_library",
"//vendor/k8s.io/utils/exec:go_default_library",
],

47
pkg/probe/exec/errors.go Normal file
View File

@ -0,0 +1,47 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package exec
import (
"time"
)
// NewTimeoutError returns a new TimeoutError.
func NewTimeoutError(err error, timeout time.Duration) *TimeoutError {
return &TimeoutError{
err: err,
timeout: timeout,
}
}
// TimeoutError is an error returned on exec probe timeouts. It should be returned by CRI implementations
// in order for the exec prober to interpret exec timeouts as failed probes.
// TODO: this error type can likely be removed when we support CRI errors.
type TimeoutError struct {
err error
timeout time.Duration
}
// Error returns the error string.
func (t *TimeoutError) Error() string {
return t.err.Error()
}
// Timeout returns the timeout duration of the exec probe.
func (t *TimeoutError) Timeout() time.Duration {
return t.timeout
}

View File

@ -19,6 +19,8 @@ package exec
import (
"bytes"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/util/ioutils"
"k8s.io/kubernetes/pkg/probe"
@ -66,6 +68,16 @@ func (pr execProber) Probe(e exec.Cmd) (probe.Result, string, error) {
}
return probe.Failure, string(data), nil
}
timeoutErr, ok := err.(*TimeoutError)
if ok {
if utilfeature.DefaultFeatureGate.Enabled(features.ExecProbeTimeout) {
return probe.Failure, string(data), nil
}
klog.Warningf("Exec probe timed out after %s but ExecProbeTimeout feature gate was disabled", timeoutErr.Timeout())
}
return probe.Unknown, "", err
}
return probe.Success, string(data), nil