
Automatic merge from submit-queue (batch tested with PRs 45809, 46515, 46484, 46516, 45614) CRI: add methods for container stats **What this PR does / why we need it**: Define methods in CRI to get container stats. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: Part of https://github.com/kubernetes/features/issues/290; addresses #27097 **Special notes for your reviewer**: This PR defines the *minimum required* container metrics for the existing components to function, loosely based on the previous discussion on [core metrics](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/core-metrics-pipeline.md) as well as the existing cadvisor/summary APIs. Two new RPC calls are added to the RuntimeService: `ContainerStats` and `ListContainerStats`. The former retrieves stats for a given container, while the latter gets stats for all containers in one call. The stats gathering time of each subsystem can vary substantially (e.g., cpu vs. disk), so even though the on-demand model preferred due to its simplicity, we’d rather give the container runtime more flexibility to determine the collection frequency for each subsystem*. As a trade-off, each piece of stats for the subsystem must contain a timestamp to let kubelet know how fresh/recent the stats are. In the future, we should also recommend a guideline for how recent the stats should be in order to ensure the reliability (e.g., eviction) and the responsiveness (e.g., autoscaling) of the kubernetes cluster. The next step is to plumb this through kubelet so that kubelet can choose consume container stats from CRI or cadvisor. **Alternatively, we can add calls to get stats of individual subsystems. However, kubelet does not have the complete knowledge of the runtime environment, so this would only lead to unnecessary complexity in kubelet.* **Release note**: ```release-note Augment CRI to support retrieving container stats from the runtime. ```
141 lines
4.1 KiB
Go
141 lines
4.1 KiB
Go
/*
|
|
Copyright 2016 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package remote
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/golang/glog"
|
|
"google.golang.org/grpc"
|
|
|
|
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
|
|
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1"
|
|
"k8s.io/kubernetes/pkg/kubelet/util"
|
|
)
|
|
|
|
// RemoteImageService is a gRPC implementation of internalapi.ImageManagerService.
|
|
type RemoteImageService struct {
|
|
timeout time.Duration
|
|
imageClient runtimeapi.ImageServiceClient
|
|
}
|
|
|
|
// NewRemoteImageService creates a new internalapi.ImageManagerService.
|
|
func NewRemoteImageService(endpoint string, connectionTimeout time.Duration) (internalapi.ImageManagerService, error) {
|
|
glog.V(3).Infof("Connecting to image service %s", endpoint)
|
|
addr, dailer, err := util.GetAddressAndDialer(endpoint)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
conn, err := grpc.Dial(addr, grpc.WithInsecure(), grpc.WithTimeout(connectionTimeout), grpc.WithDialer(dailer))
|
|
if err != nil {
|
|
glog.Errorf("Connect remote image service %s failed: %v", addr, err)
|
|
return nil, err
|
|
}
|
|
|
|
return &RemoteImageService{
|
|
timeout: connectionTimeout,
|
|
imageClient: runtimeapi.NewImageServiceClient(conn),
|
|
}, nil
|
|
}
|
|
|
|
// ListImages lists available images.
|
|
func (r *RemoteImageService) ListImages(filter *runtimeapi.ImageFilter) ([]*runtimeapi.Image, error) {
|
|
ctx, cancel := getContextWithTimeout(r.timeout)
|
|
defer cancel()
|
|
|
|
resp, err := r.imageClient.ListImages(ctx, &runtimeapi.ListImagesRequest{
|
|
Filter: filter,
|
|
})
|
|
if err != nil {
|
|
glog.Errorf("ListImages with filter %q from image service failed: %v", filter, err)
|
|
return nil, err
|
|
}
|
|
|
|
return resp.Images, nil
|
|
}
|
|
|
|
// ImageStatus returns the status of the image.
|
|
func (r *RemoteImageService) ImageStatus(image *runtimeapi.ImageSpec) (*runtimeapi.Image, error) {
|
|
ctx, cancel := getContextWithTimeout(r.timeout)
|
|
defer cancel()
|
|
|
|
resp, err := r.imageClient.ImageStatus(ctx, &runtimeapi.ImageStatusRequest{
|
|
Image: image,
|
|
})
|
|
if err != nil {
|
|
glog.Errorf("ImageStatus %q from image service failed: %v", image.Image, err)
|
|
return nil, err
|
|
}
|
|
|
|
if resp.Image != nil {
|
|
if resp.Image.Id == "" || resp.Image.Size_ == 0 {
|
|
errorMessage := fmt.Sprintf("Id or size of image %q is not set", image.Image)
|
|
glog.Errorf("ImageStatus failed: %s", errorMessage)
|
|
return nil, errors.New(errorMessage)
|
|
}
|
|
}
|
|
|
|
return resp.Image, nil
|
|
}
|
|
|
|
// PullImage pulls an image with authentication config.
|
|
func (r *RemoteImageService) PullImage(image *runtimeapi.ImageSpec, auth *runtimeapi.AuthConfig) (string, error) {
|
|
ctx, cancel := getContextWithCancel()
|
|
defer cancel()
|
|
|
|
resp, err := r.imageClient.PullImage(ctx, &runtimeapi.PullImageRequest{
|
|
Image: image,
|
|
Auth: auth,
|
|
})
|
|
if err != nil {
|
|
glog.Errorf("PullImage %q from image service failed: %v", image.Image, err)
|
|
return "", err
|
|
}
|
|
|
|
if resp.ImageRef == "" {
|
|
errorMessage := fmt.Sprintf("imageRef of image %q is not set", image.Image)
|
|
glog.Errorf("PullImage failed: %s", errorMessage)
|
|
return "", errors.New(errorMessage)
|
|
}
|
|
|
|
return resp.ImageRef, nil
|
|
}
|
|
|
|
// RemoveImage removes the image.
|
|
func (r *RemoteImageService) RemoveImage(image *runtimeapi.ImageSpec) error {
|
|
ctx, cancel := getContextWithTimeout(r.timeout)
|
|
defer cancel()
|
|
|
|
_, err := r.imageClient.RemoveImage(ctx, &runtimeapi.RemoveImageRequest{
|
|
Image: image,
|
|
})
|
|
if err != nil {
|
|
glog.Errorf("RemoveImage %q from image service failed: %v", image.Image, err)
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ImageFsInfo returns information of the filesystem that is used to store images.
|
|
func (r *RemoteImageService) ImageFsInfo(req *runtimeapi.ImageFsInfoRequest) (*runtimeapi.ImageFsInfoResponse, error) {
|
|
return nil, fmt.Errorf("not implemented")
|
|
}
|