Merge pull request #36887 from hex108/pullImage

Automatic merge from submit-queue (batch tested with PRs 38432, 36887, 38415)

Add --image-pull-stuck-timeout option  to kubelet

In this PR, add --image-pull-stuck-time option to specify the stuck timeout for pulling image.

When docker extracts image layer, there is no progress. The progress will exceed 1m if the layer is big or system is busy. It happend in our cluster, so I add above option to specify the timeout. 

Related error log:
<pre>
[... kube_docker_client.go:29] Cancel pulling image "our_registry/demo/test" because of no progress for 1m0s, latest progress "c914ad57d670": Extracting [==================>] 513.5 MB/513.5MB"
[... docker_manager.go:2254] container start failed: ErrImagePull: net/http: request canceled
</pre>
This commit is contained in:
Kubernetes Submit Queue
2016-12-08 20:37:55 -08:00
committed by GitHub
14 changed files with 3598 additions and 3494 deletions

View File

@@ -200,6 +200,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.CgroupRoot, "cgroup-root", s.CgroupRoot, "Optional root cgroup to use for pods. This is handled by the container runtime on a best effort basis. Default: '', which means use the container runtime default.")
fs.StringVar(&s.ContainerRuntime, "container-runtime", s.ContainerRuntime, "The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'.")
fs.DurationVar(&s.RuntimeRequestTimeout.Duration, "runtime-request-timeout", s.RuntimeRequestTimeout.Duration, "Timeout of all runtime requests except long running request - pull, logs, exec and attach. When timeout exceeded, kubelet will cancel the request, throw out an error and retry later. Default: 2m0s")
fs.DurationVar(&s.ImagePullProgressDeadline.Duration, "image-pull-progress-deadline", s.ImagePullProgressDeadline.Duration, "If no pulling progress is made before this deadline, the image pulling will be cancelled. Default: 1m0s.")
fs.StringVar(&s.LockFilePath, "lock-file", s.LockFilePath, "<Warning: Alpha feature> The path to file for kubelet to use as a lock file.")
fs.BoolVar(&s.ExitOnLockContention, "exit-on-lock-contention", s.ExitOnLockContention, "Whether kubelet should exit upon lock-file contention.")
fs.StringVar(&s.RktPath, "rkt-path", s.RktPath, "Path of rkt binary. Leave empty to use the first rkt in $PATH. Only used if --container-runtime='rkt'.")

View File

@@ -129,7 +129,8 @@ func UnsecuredKubeletDeps(s *options.KubeletServer) (*kubelet.KubeletDeps, error
var dockerClient dockertools.DockerInterface
if s.ContainerRuntime == "docker" {
dockerClient = dockertools.ConnectToDockerOrDie(s.DockerEndpoint, s.RuntimeRequestTimeout.Duration)
dockerClient = dockertools.ConnectToDockerOrDie(s.DockerEndpoint, s.RuntimeRequestTimeout.Duration,
s.ImagePullProgressDeadline.Duration)
} else {
dockerClient = nil
}

View File

@@ -273,6 +273,7 @@ image-gc-high-threshold
image-gc-low-threshold
image-project
image-pull-policy
image-pull-progress-deadline
image-service-endpoint
include-extended-apis
include-extended-apis

File diff suppressed because it is too large Load Diff

View File

@@ -321,6 +321,10 @@ type KubeletConfiguration struct {
// requests - pull, logs, exec and attach.
// +optional
RuntimeRequestTimeout metav1.Duration `json:"runtimeRequestTimeout,omitempty"`
// If no pulling progress is made before the deadline imagePullProgressDeadline,
// the image pulling will be cancelled. Defaults to 1m0s.
// +optional
ImagePullProgressDeadline metav1.Duration `json:"imagePullProgressDeadline,omitempty"`
// rktPath is the path of rkt binary. Leave empty to use the first rkt in
// $PATH.
// +optional

View File

@@ -213,6 +213,9 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.RuntimeRequestTimeout == zeroDuration {
obj.RuntimeRequestTimeout = metav1.Duration{Duration: 2 * time.Minute}
}
if obj.ImagePullProgressDeadline == zeroDuration {
obj.ImagePullProgressDeadline = metav1.Duration{Duration: 1 * time.Minute}
}
if obj.CPUCFSQuota == nil {
obj.CPUCFSQuota = boolVar(true)
}

View File

@@ -373,6 +373,9 @@ type KubeletConfiguration struct {
// runtimeRequestTimeout is the timeout for all runtime requests except long running
// requests - pull, logs, exec and attach.
RuntimeRequestTimeout metav1.Duration `json:"runtimeRequestTimeout"`
// If no pulling progress is made before the deadline imagePullProgressDeadline,
// the image pulling will be cancelled. Defaults to 1m0s.
ImagePullProgressDeadline metav1.Duration `json:"imagePullProgressDeadline,omitempty"`
// rktPath is the path of rkt binary. Leave empty to use the first rkt in
// $PATH.
RktPath string `json:"rktPath"`

View File

@@ -341,6 +341,7 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
out.RemoteImageEndpoint = in.RemoteImageEndpoint
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
out.ImagePullProgressDeadline = in.ImagePullProgressDeadline
out.RktPath = in.RktPath
out.ExperimentalMounterPath = in.ExperimentalMounterPath
out.RktAPIEndpoint = in.RktAPIEndpoint
@@ -514,6 +515,7 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
out.RemoteImageEndpoint = in.RemoteImageEndpoint
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
out.ImagePullProgressDeadline = in.ImagePullProgressDeadline
out.RktPath = in.RktPath
out.ExperimentalMounterPath = in.ExperimentalMounterPath
out.RktAPIEndpoint = in.RktAPIEndpoint

View File

@@ -316,6 +316,7 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
out.RemoteImageEndpoint = in.RemoteImageEndpoint
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
out.ImagePullProgressDeadline = in.ImagePullProgressDeadline
out.RktPath = in.RktPath
out.ExperimentalMounterPath = in.ExperimentalMounterPath
out.RktAPIEndpoint = in.RktAPIEndpoint

View File

@@ -319,6 +319,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
out.RemoteRuntimeEndpoint = in.RemoteRuntimeEndpoint
out.RemoteImageEndpoint = in.RemoteImageEndpoint
out.RuntimeRequestTimeout = in.RuntimeRequestTimeout
out.ImagePullProgressDeadline = in.ImagePullProgressDeadline
out.RktPath = in.RktPath
out.ExperimentalMounterPath = in.ExperimentalMounterPath
out.RktAPIEndpoint = in.RktAPIEndpoint

View File

@@ -2669,6 +2669,12 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
Ref: spec.MustCreateRef("#/definitions/v1.Duration"),
},
},
"imagePullProgressDeadline": {
SchemaProps: spec.SchemaProps{
Description: "If no pulling progress is made before the deadline imagePullProgressDeadline, the image pulling will be cancelled. Defaults to 1m0s.",
Ref: spec.MustCreateRef("#/definitions/v1.Duration"),
},
},
"rktPath": {
SchemaProps: spec.SchemaProps{
Description: "rktPath is the path of rkt binary. Leave empty to use the first rkt in $PATH.",
@@ -14564,6 +14570,12 @@ var OpenAPIDefinitions *common.OpenAPIDefinitions = &common.OpenAPIDefinitions{
Ref: spec.MustCreateRef("#/definitions/v1.Duration"),
},
},
"imagePullProgressDeadline": {
SchemaProps: spec.SchemaProps{
Description: "If no pulling progress is made before the deadline imagePullProgressDeadline, the image pulling will be cancelled. Defaults to 1m0s.",
Ref: spec.MustCreateRef("#/definitions/v1.Duration"),
},
},
"rktPath": {
SchemaProps: spec.SchemaProps{
Description: "rktPath is the path of rkt binary. Leave empty to use the first rkt in $PATH.",

View File

@@ -365,7 +365,7 @@ func getDockerClient(dockerEndpoint string) (*dockerapi.Client, error) {
// is the timeout for docker requests. If timeout is exceeded, the request
// will be cancelled and throw out an error. If requestTimeout is 0, a default
// value will be applied.
func ConnectToDockerOrDie(dockerEndpoint string, requestTimeout time.Duration) DockerInterface {
func ConnectToDockerOrDie(dockerEndpoint string, requestTimeout, imagePullProgressDeadline time.Duration) DockerInterface {
if dockerEndpoint == "fake://" {
return NewFakeDockerClient()
}
@@ -374,7 +374,7 @@ func ConnectToDockerOrDie(dockerEndpoint string, requestTimeout time.Duration) D
glog.Fatalf("Couldn't connect to docker: %v", err)
}
glog.Infof("Start docker client with request timeout=%v", requestTimeout)
return newKubeDockerClient(client, requestTimeout)
return newKubeDockerClient(client, requestTimeout, imagePullProgressDeadline)
}
// GetKubeletDockerContainers lists all container or just the running ones.

View File

@@ -51,7 +51,11 @@ import (
type kubeDockerClient struct {
// timeout is the timeout of short running docker operations.
timeout time.Duration
client *dockerapi.Client
// If no pulling progress is made before imagePullProgressDeadline, the image pulling will be cancelled.
// Docker reports image progress for every 512kB block, so normally there shouldn't be too long interval
// between progress updates.
imagePullProgressDeadline time.Duration
client *dockerapi.Client
}
// Make sure that kubeDockerClient implemented the DockerInterface.
@@ -72,25 +76,19 @@ const (
// defaultImagePullingProgressReportInterval is the default interval of image pulling progress reporting.
defaultImagePullingProgressReportInterval = 10 * time.Second
// defaultImagePullingStuckTimeout is the default timeout for image pulling stuck. If no progress
// is made for defaultImagePullingStuckTimeout, the image pulling will be cancelled.
// Docker reports image progress for every 512kB block, so normally there shouldn't be too long interval
// between progress updates.
// TODO(random-liu): Make this configurable
defaultImagePullingStuckTimeout = 1 * time.Minute
)
// newKubeDockerClient creates an kubeDockerClient from an existing docker client. If requestTimeout is 0,
// defaultTimeout will be applied.
func newKubeDockerClient(dockerClient *dockerapi.Client, requestTimeout time.Duration) DockerInterface {
func newKubeDockerClient(dockerClient *dockerapi.Client, requestTimeout, imagePullProgressDeadline time.Duration) DockerInterface {
if requestTimeout == 0 {
requestTimeout = defaultTimeout
}
k := &kubeDockerClient{
client: dockerClient,
timeout: requestTimeout,
client: dockerClient,
timeout: requestTimeout,
imagePullProgressDeadline: imagePullProgressDeadline,
}
// Notice that this assumes that docker is running before kubelet is started.
v, err := k.Version()
@@ -294,18 +292,20 @@ func (p *progress) get() (string, time.Time) {
// progressReporter keeps the newest image pulling progress and periodically report the newest progress.
type progressReporter struct {
*progress
image string
cancel context.CancelFunc
stopCh chan struct{}
image string
cancel context.CancelFunc
stopCh chan struct{}
imagePullProgressDeadline time.Duration
}
// newProgressReporter creates a new progressReporter for specific image with specified reporting interval
func newProgressReporter(image string, cancel context.CancelFunc) *progressReporter {
func newProgressReporter(image string, cancel context.CancelFunc, imagePullProgressDeadline time.Duration) *progressReporter {
return &progressReporter{
progress: newProgress(),
image: image,
cancel: cancel,
stopCh: make(chan struct{}),
imagePullProgressDeadline: imagePullProgressDeadline,
}
}
@@ -319,9 +319,9 @@ func (p *progressReporter) start() {
select {
case <-ticker.C:
progress, timestamp := p.progress.get()
// If there is no progress for defaultImagePullingStuckTimeout, cancel the operation.
if time.Now().Sub(timestamp) > defaultImagePullingStuckTimeout {
glog.Errorf("Cancel pulling image %q because of no progress for %v, latest progress: %q", p.image, defaultImagePullingStuckTimeout, progress)
// If there is no progress for p.imagePullProgressDeadline, cancel the operation.
if time.Now().Sub(timestamp) > p.imagePullProgressDeadline {
glog.Errorf("Cancel pulling image %q because of no progress for %v, latest progress: %q", p.image, p.imagePullProgressDeadline, progress)
p.cancel()
return
}
@@ -354,7 +354,7 @@ func (d *kubeDockerClient) PullImage(image string, auth dockertypes.AuthConfig,
return err
}
defer resp.Close()
reporter := newProgressReporter(image, cancel)
reporter := newProgressReporter(image, cancel, d.imagePullProgressDeadline)
reporter.start()
defer reporter.stop()
decoder := json.NewDecoder(resp)

View File

@@ -38,6 +38,7 @@ const (
maxTotalContainers = -1
defaultRuntimeRequestTimeoutDuration = 1 * time.Minute
defaultImagePullProgressDeadline = 1 * time.Minute
garbageCollectDuration = 3 * time.Minute
setupDuration = 10 * time.Minute
runtimePollInterval = 10 * time.Second
@@ -258,7 +259,7 @@ func containerGCTest(f *framework.Framework, test testRun) {
func dockerContainerGCTest(f *framework.Framework, test testRun) {
var runtime docker.DockerInterface
BeforeEach(func() {
runtime = docker.ConnectToDockerOrDie(defaultDockerEndpoint, defaultRuntimeRequestTimeoutDuration)
runtime = docker.ConnectToDockerOrDie(defaultDockerEndpoint, defaultRuntimeRequestTimeoutDuration, defaultImagePullProgressDeadline)
})
for _, pod := range test.testPods {
// Initialize the getContainerNames function to use the dockertools api