Merge pull request #36887 from hex108/pullImage
Automatic merge from submit-queue (batch tested with PRs 38432, 36887, 38415) Add --image-pull-stuck-timeout option to kubelet In this PR, add --image-pull-stuck-time option to specify the stuck timeout for pulling image. When docker extracts image layer, there is no progress. The progress will exceed 1m if the layer is big or system is busy. It happend in our cluster, so I add above option to specify the timeout. Related error log: <pre> [... kube_docker_client.go:29] Cancel pulling image "our_registry/demo/test" because of no progress for 1m0s, latest progress "c914ad57d670": Extracting [==================>] 513.5 MB/513.5MB" [... docker_manager.go:2254] container start failed: ErrImagePull: net/http: request canceled </pre>
This commit is contained in:
@@ -200,6 +200,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.StringVar(&s.CgroupRoot, "cgroup-root", s.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
|
||||
fs.StringVar(&s.ContainerRuntime, "container-runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.")
|
||||
fs.DurationVar(&s.RuntimeRequestTimeout.Duration, "runtime-request-timeout", s.RuntimeRequestTimeout.Duration, "Timeout of all runtime requests except long running request - pull, logs, exec and attach. When timeout exceeded, kubelet will cancel the request, throw out an error and retry later. Default: 2m0s")
|
||||
fs.DurationVar(&s.ImagePullProgressDeadline.Duration, "image-pull-progress-deadline", s.ImagePullProgressDeadline.Duration, "If no pulling progress is made before this deadline, the image pulling will be cancelled. Default: 1m0s.")
|
||||
fs.StringVar(&s.LockFilePath, "lock-file", s.LockFilePath, "<Warning: Alpha feature> The path to file for kubelet to use as a lock file.")
|
||||
fs.BoolVar(&s.ExitOnLockContention, "exit-on-lock-contention", s.ExitOnLockContention, "Whether kubelet should exit upon lock-file contention.")
|
||||
fs.StringVar(&s.RktPath, "rkt-path", s.RktPath, "Path of rkt binary. Leave empty to use the first rkt in $PATH. Only used if --container-runtime='rkt'.")
|
||||
|
||||
@@ -129,7 +129,8 @@ func UnsecuredKubeletDeps(s *options.KubeletServer) (*kubelet.KubeletDeps, error
|
||||
|
||||
var dockerClient dockertools.DockerInterface
|
||||
if s.ContainerRuntime == "docker" {
|
||||
dockerClient = dockertools.ConnectToDockerOrDie(s.DockerEndpoint, s.RuntimeRequestTimeout.Duration)
|
||||
dockerClient = dockertools.ConnectToDockerOrDie(s.DockerEndpoint, s.RuntimeRequestTimeout.Duration,
|
||||
s.ImagePullProgressDeadline.Duration)
|
||||
} else {
|
||||
dockerClient = nil
|
||||
}
|
||||
|
||||
@@ -273,6 +273,7 @@ image-gc-high-threshold
|
||||
image-gc-low-threshold
|
||||
image-project
|
||||
image-pull-policy
|
||||
image-pull-progress-deadline
|
||||
image-service-endpoint
|
||||
include-extended-apis
|
||||
include-extended-apis
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -321,6 +321,10 @@ type KubeletConfiguration struct {
|
||||
// requests - pull, logs, exec and attach.
|
||||
// +optional
|
||||
RuntimeRequestTimeout metav1.Duration `json:"runtimeRequestTimeout,omitempty"`
|
||||
// If no pulling progress is made before the deadline imagePullProgressDeadline,
|
||||
// the image pulling will be cancelled. Defaults to 1m0s.
|
||||
// +optional
|
||||
ImagePullProgressDeadline metav1.Duration `json:"imagePullProgressDeadline,omitempty"`
|
||||
// rktPath is the path of rkt binary. Leave empty to use the first rkt in
|
||||
// $PATH.
|
||||
// +optional
|
||||
|
||||
@@ -213,6 +213,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
|
||||
if obj.RuntimeRequestTimeout == zeroDuration {
|
||||
obj.RuntimeRequestTimeout = metav1.Duration{Duration: 2 * time.Minute}
|
||||
}
|
||||
if obj.ImagePullProgressDeadline == zeroDuration {
|
||||
obj.ImagePullProgressDeadline = metav1.Duration{Duration: 1 * time.Minute}
|
||||
}
|
||||
if obj.CPUCFSQuota == nil {
|
||||
obj.CPUCFSQuota = boolVar(true)
|
||||
}
|
||||
|
||||
@@ -373,6 +373,9 @@ type KubeletConfiguration struct {
|
||||
// runtimeRequestTimeout is the timeout for all runtime requests except long running
|
||||
// requests - pull, logs, exec and attach.
|
||||
RuntimeRequestTimeout metav1.Duration `json:"runtimeRequestTimeout"`
|
||||
// If no pulling progress is made before the deadline imagePullProgressDeadline,
|
||||
// the image pulling will be cancelled. Defaults to 1m0s.
|
||||
ImagePullProgressDeadline metav1.Duration `json:"imagePullProgressDeadline,omitempty"`
|
||||
// rktPath is the path of rkt binary. Leave empty to use the first rkt in
|
||||
// $PATH.
|
||||
RktPath string `json:"rktPath"`
|
||||
|
||||
@@ -341,6 +341,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
|
||||
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
|
||||
out.RemoteImageEndpoint = in.RemoteImageEndpoint
|
||||
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
|
||||
out.ImagePullProgressDeadline = in.ImagePullProgressDeadline
|
||||
out.RktPath = in.RktPath
|
||||
out.ExperimentalMounterPath = in.ExperimentalMounterPath
|
||||
out.RktAPIEndpoint = in.RktAPIEndpoint
|
||||
@@ -514,6 +515,7 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
|
||||
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
|
||||
out.RemoteImageEndpoint = in.RemoteImageEndpoint
|
||||
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
|
||||
out.ImagePullProgressDeadline = in.ImagePullProgressDeadline
|
||||
out.RktPath = in.RktPath
|
||||
out.ExperimentalMounterPath = in.ExperimentalMounterPath
|
||||
out.RktAPIEndpoint = in.RktAPIEndpoint
|
||||
|
||||
@@ -316,6 +316,7 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
|
||||
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
|
||||
out.RemoteImageEndpoint = in.RemoteImageEndpoint
|
||||
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
|
||||
out.ImagePullProgressDeadline = in.ImagePullProgressDeadline
|
||||
out.RktPath = in.RktPath
|
||||
out.ExperimentalMounterPath = in.ExperimentalMounterPath
|
||||
out.RktAPIEndpoint = in.RktAPIEndpoint
|
||||
|
||||
@@ -319,6 +319,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
|
||||
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
|
||||
out.RemoteImageEndpoint = in.RemoteImageEndpoint
|
||||
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
|
||||
out.ImagePullProgressDeadline = in.ImagePullProgressDeadline
|
||||
out.RktPath = in.RktPath
|
||||
out.ExperimentalMounterPath = in.ExperimentalMounterPath
|
||||
out.RktAPIEndpoint = in.RktAPIEndpoint
|
||||
|
||||
@@ -2669,6 +2669,12 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
||||
Ref: spec.MustCreateRef("#/definitions/v1.Duration"),
|
||||
},
|
||||
},
|
||||
"imagePullProgressDeadline": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "If no pulling progress is made before the deadline imagePullProgressDeadline, the image pulling will be cancelled. Defaults to 1m0s.",
|
||||
Ref: spec.MustCreateRef("#/definitions/v1.Duration"),
|
||||
},
|
||||
},
|
||||
"rktPath": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "rktPath is the path of rkt binary. Leave empty to use the first rkt in $PATH.",
|
||||
@@ -14564,6 +14570,12 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
|
||||
Ref: spec.MustCreateRef("#/definitions/v1.Duration"),
|
||||
},
|
||||
},
|
||||
"imagePullProgressDeadline": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "If no pulling progress is made before the deadline imagePullProgressDeadline, the image pulling will be cancelled. Defaults to 1m0s.",
|
||||
Ref: spec.MustCreateRef("#/definitions/v1.Duration"),
|
||||
},
|
||||
},
|
||||
"rktPath": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "rktPath is the path of rkt binary. Leave empty to use the first rkt in $PATH.",
|
||||
|
||||
@@ -365,7 +365,7 @@ func getDockerClient(dockerEndpoint string) (*dockerapi.Client, error) {
|
||||
// is the timeout for docker requests. If timeout is exceeded, the request
|
||||
// will be cancelled and throw out an error. If requestTimeout is 0, a default
|
||||
// value will be applied.
|
||||
func ConnectToDockerOrDie(dockerEndpoint string, requestTimeout time.Duration) DockerInterface {
|
||||
func ConnectToDockerOrDie(dockerEndpoint string, requestTimeout, imagePullProgressDeadline time.Duration) DockerInterface {
|
||||
if dockerEndpoint == "fake://" {
|
||||
return NewFakeDockerClient()
|
||||
}
|
||||
@@ -374,7 +374,7 @@ func ConnectToDockerOrDie(dockerEndpoint string, requestTimeout time.Duration) D
|
||||
glog.Fatalf("Couldn't connect to docker: %v", err)
|
||||
}
|
||||
glog.Infof("Start docker client with request timeout=%v", requestTimeout)
|
||||
return newKubeDockerClient(client, requestTimeout)
|
||||
return newKubeDockerClient(client, requestTimeout, imagePullProgressDeadline)
|
||||
}
|
||||
|
||||
// GetKubeletDockerContainers lists all container or just the running ones.
|
||||
|
||||
@@ -51,6 +51,10 @@ import (
|
||||
type kubeDockerClient struct {
|
||||
// timeout is the timeout of short running docker operations.
|
||||
timeout time.Duration
|
||||
// If no pulling progress is made before imagePullProgressDeadline, the image pulling will be cancelled.
|
||||
// Docker reports image progress for every 512kB block, so normally there shouldn't be too long interval
|
||||
// between progress updates.
|
||||
imagePullProgressDeadline time.Duration
|
||||
client *dockerapi.Client
|
||||
}
|
||||
|
||||
@@ -72,18 +76,11 @@ const (
|
||||
|
||||
// defaultImagePullingProgressReportInterval is the default interval of image pulling progress reporting.
|
||||
defaultImagePullingProgressReportInterval = 10 * time.Second
|
||||
|
||||
// defaultImagePullingStuckTimeout is the default timeout for image pulling stuck. If no progress
|
||||
// is made for defaultImagePullingStuckTimeout, the image pulling will be cancelled.
|
||||
// Docker reports image progress for every 512kB block, so normally there shouldn't be too long interval
|
||||
// between progress updates.
|
||||
// TODO(random-liu): Make this configurable
|
||||
defaultImagePullingStuckTimeout = 1 * time.Minute
|
||||
)
|
||||
|
||||
// newKubeDockerClient creates an kubeDockerClient from an existing docker client. If requestTimeout is 0,
|
||||
// defaultTimeout will be applied.
|
||||
func newKubeDockerClient(dockerClient *dockerapi.Client, requestTimeout time.Duration) DockerInterface {
|
||||
func newKubeDockerClient(dockerClient *dockerapi.Client, requestTimeout, imagePullProgressDeadline time.Duration) DockerInterface {
|
||||
if requestTimeout == 0 {
|
||||
requestTimeout = defaultTimeout
|
||||
}
|
||||
@@ -91,6 +88,7 @@ func newKubeDockerClient(dockerClient *dockerapi.Client, requestTimeout time.Dur
|
||||
k := &kubeDockerClient{
|
||||
client: dockerClient,
|
||||
timeout: requestTimeout,
|
||||
imagePullProgressDeadline: imagePullProgressDeadline,
|
||||
}
|
||||
// Notice that this assumes that docker is running before kubelet is started.
|
||||
v, err := k.Version()
|
||||
@@ -297,15 +295,17 @@ type progressReporter struct {
|
||||
image string
|
||||
cancel context.CancelFunc
|
||||
stopCh chan struct{}
|
||||
imagePullProgressDeadline time.Duration
|
||||
}
|
||||
|
||||
// newProgressReporter creates a new progressReporter for specific image with specified reporting interval
|
||||
func newProgressReporter(image string, cancel context.CancelFunc) *progressReporter {
|
||||
func newProgressReporter(image string, cancel context.CancelFunc, imagePullProgressDeadline time.Duration) *progressReporter {
|
||||
return &progressReporter{
|
||||
progress: newProgress(),
|
||||
image: image,
|
||||
cancel: cancel,
|
||||
stopCh: make(chan struct{}),
|
||||
imagePullProgressDeadline: imagePullProgressDeadline,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -319,9 +319,9 @@ func (p *progressReporter) start() {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
progress, timestamp := p.progress.get()
|
||||
// If there is no progress for defaultImagePullingStuckTimeout, cancel the operation.
|
||||
if time.Now().Sub(timestamp) > defaultImagePullingStuckTimeout {
|
||||
glog.Errorf("Cancel pulling image %q because of no progress for %v, latest progress: %q", p.image, defaultImagePullingStuckTimeout, progress)
|
||||
// If there is no progress for p.imagePullProgressDeadline, cancel the operation.
|
||||
if time.Now().Sub(timestamp) > p.imagePullProgressDeadline {
|
||||
glog.Errorf("Cancel pulling image %q because of no progress for %v, latest progress: %q", p.image, p.imagePullProgressDeadline, progress)
|
||||
p.cancel()
|
||||
return
|
||||
}
|
||||
@@ -354,7 +354,7 @@ func (d *kubeDockerClient) PullImage(image string, auth dockertypes.AuthConfig,
|
||||
return err
|
||||
}
|
||||
defer resp.Close()
|
||||
reporter := newProgressReporter(image, cancel)
|
||||
reporter := newProgressReporter(image, cancel, d.imagePullProgressDeadline)
|
||||
reporter.start()
|
||||
defer reporter.stop()
|
||||
decoder := json.NewDecoder(resp)
|
||||
|
||||
@@ -38,6 +38,7 @@ const (
|
||||
maxTotalContainers = -1
|
||||
|
||||
defaultRuntimeRequestTimeoutDuration = 1 * time.Minute
|
||||
defaultImagePullProgressDeadline = 1 * time.Minute
|
||||
garbageCollectDuration = 3 * time.Minute
|
||||
setupDuration = 10 * time.Minute
|
||||
runtimePollInterval = 10 * time.Second
|
||||
@@ -258,7 +259,7 @@ func containerGCTest(f *framework.Framework, test testRun) {
|
||||
func dockerContainerGCTest(f *framework.Framework, test testRun) {
|
||||
var runtime docker.DockerInterface
|
||||
BeforeEach(func() {
|
||||
runtime = docker.ConnectToDockerOrDie(defaultDockerEndpoint, defaultRuntimeRequestTimeoutDuration)
|
||||
runtime = docker.ConnectToDockerOrDie(defaultDockerEndpoint, defaultRuntimeRequestTimeoutDuration, defaultImagePullProgressDeadline)
|
||||
})
|
||||
for _, pod := range test.testPods {
|
||||
// Initialize the getContainerNames function to use the dockertools api
|
||||
|
||||
Reference in New Issue
Block a user