kubernetes/pkg/kubelet/container/helpers.go
Kubernetes Submit Queue 8d10a8f74f
Merge pull request #64006 from Random-Liu/streaming-auth
Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Add proxy for container streaming in kubelet for streaming auth.

For https://github.com/kubernetes/kubernetes/issues/36666, option 2 of https://github.com/kubernetes/kubernetes/issues/36666#issuecomment-378440458.

This PR:
1. Removed the `DirectStreamingRuntime`, and changed `IndirectStreamingRuntime` to `StreamingRuntime`. All `DirectStreamingRuntime`s, `dockertools` and `rkt`, were removed.
2. Proxy container streaming in kubelet instead of returning redirect to apiserver. This solves the container runtime authentication issue, which is what we agreed on in https://github.com/kubernetes/kubernetes/issues/36666.

Please note that, this PR replaced the redirect with proxy directly instead of adding a knob to switch between the 2 behaviors. For existing CRI runtimes like containerd and cri-o, they should change to serve container streaming on localhost, so as to make the whole container streaming connection secure.

 If a general authentication mechanism proposed in https://github.com/kubernetes/kubernetes/issues/62747 is ready, we can switch back to redirect, and all code can be found in github history.

Please also note that this added some overhead in kubelet when there are container streaming connections. However, the actual bottleneck is in the apiserver anyway, because it does proxy for all container streaming happens in the cluster. So it seems fine to get security and simplicity with this overhead. @derekwaynecarr @mrunalp Are you ok with this? Or do you prefer a knob?

@yujuhong @timstclair @dchen1107 @mikebrow @feiskyer 
/cc @kubernetes/sig-node-pr-reviews 
**Release note**:

```release-note
Kubelet now proxies container streaming between apiserver and container runtime. The connection between kubelet and apiserver is authenticated. Container runtime should change streaming server to serve on localhost, to make the connection between kubelet and container runtime local.

In this way, the whole container streaming connection is secure. To switch back to the old behavior, set `--redirect-container-streaming=true` flag.
```
2018-05-31 22:45:29 -07:00

322 lines
11 KiB
Go

/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package container
import (
"fmt"
"hash/fnv"
"strings"
"github.com/golang/glog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/kubelet/util/format"
hashutil "k8s.io/kubernetes/pkg/util/hash"
"k8s.io/kubernetes/third_party/forked/golang/expansion"
)
// HandlerRunner runs a lifecycle handler for a container.
type HandlerRunner interface {
Run(containerID ContainerID, pod *v1.Pod, container *v1.Container, handler *v1.Handler) (string, error)
}
// RuntimeHelper wraps kubelet to make container runtime
// able to get necessary informations like the RunContainerOptions, DNS settings, Host IP.
type RuntimeHelper interface {
GenerateRunContainerOptions(pod *v1.Pod, container *v1.Container, podIP string) (contOpts *RunContainerOptions, cleanupAction func(), err error)
GetPodDNS(pod *v1.Pod) (dnsConfig *runtimeapi.DNSConfig, err error)
// GetPodCgroupParent returns the CgroupName identifier, and its literal cgroupfs form on the host
// of a pod.
GetPodCgroupParent(pod *v1.Pod) string
GetPodDir(podUID types.UID) string
GeneratePodHostNameAndDomain(pod *v1.Pod) (hostname string, hostDomain string, err error)
// GetExtraSupplementalGroupsForPod returns a list of the extra
// supplemental groups for the Pod. These extra supplemental groups come
// from annotations on persistent volumes that the pod depends on.
GetExtraSupplementalGroupsForPod(pod *v1.Pod) []int64
}
// ShouldContainerBeRestarted checks whether a container needs to be restarted.
// TODO(yifan): Think about how to refactor this.
func ShouldContainerBeRestarted(container *v1.Container, pod *v1.Pod, podStatus *PodStatus) bool {
// Get latest container status.
status := podStatus.FindContainerStatusByName(container.Name)
// If the container was never started before, we should start it.
// NOTE(random-liu): If all historical containers were GC'd, we'll also return true here.
if status == nil {
return true
}
// Check whether container is running
if status.State == ContainerStateRunning {
return false
}
// Always restart container in the unknown, or in the created state.
if status.State == ContainerStateUnknown || status.State == ContainerStateCreated {
return true
}
// Check RestartPolicy for dead container
if pod.Spec.RestartPolicy == v1.RestartPolicyNever {
glog.V(4).Infof("Already ran container %q of pod %q, do nothing", container.Name, format.Pod(pod))
return false
}
if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure {
// Check the exit code.
if status.ExitCode == 0 {
glog.V(4).Infof("Already successfully ran container %q of pod %q, do nothing", container.Name, format.Pod(pod))
return false
}
}
return true
}
// HashContainer returns the hash of the container. It is used to compare
// the running container with its desired spec.
func HashContainer(container *v1.Container) uint64 {
hash := fnv.New32a()
hashutil.DeepHashObject(hash, *container)
return uint64(hash.Sum32())
}
// EnvVarsToMap constructs a map of environment name to value from a slice
// of env vars.
func EnvVarsToMap(envs []EnvVar) map[string]string {
result := map[string]string{}
for _, env := range envs {
result[env.Name] = env.Value
}
return result
}
// V1EnvVarsToMap constructs a map of environment name to value from a slice
// of env vars.
func V1EnvVarsToMap(envs []v1.EnvVar) map[string]string {
result := map[string]string{}
for _, env := range envs {
result[env.Name] = env.Value
}
return result
}
// ExpandContainerCommandOnlyStatic substitutes only static environment variable values from the
// container environment definitions. This does *not* include valueFrom substitutions.
// TODO: callers should use ExpandContainerCommandAndArgs with a fully resolved list of environment.
func ExpandContainerCommandOnlyStatic(containerCommand []string, envs []v1.EnvVar) (command []string) {
mapping := expansion.MappingFuncFor(V1EnvVarsToMap(envs))
if len(containerCommand) != 0 {
for _, cmd := range containerCommand {
command = append(command, expansion.Expand(cmd, mapping))
}
}
return command
}
func ExpandContainerVolumeMounts(mount v1.VolumeMount, envs []EnvVar) (expandedSubpath string) {
mapping := expansion.MappingFuncFor(EnvVarsToMap(envs))
return expansion.Expand(mount.SubPath, mapping)
}
func ExpandContainerCommandAndArgs(container *v1.Container, envs []EnvVar) (command []string, args []string) {
mapping := expansion.MappingFuncFor(EnvVarsToMap(envs))
if len(container.Command) != 0 {
for _, cmd := range container.Command {
command = append(command, expansion.Expand(cmd, mapping))
}
}
if len(container.Args) != 0 {
for _, arg := range container.Args {
args = append(args, expansion.Expand(arg, mapping))
}
}
return command, args
}
// Create an event recorder to record object's event except implicitly required container's, like infra container.
func FilterEventRecorder(recorder record.EventRecorder) record.EventRecorder {
return &innerEventRecorder{
recorder: recorder,
}
}
type innerEventRecorder struct {
recorder record.EventRecorder
}
func (irecorder *innerEventRecorder) shouldRecordEvent(object runtime.Object) (*v1.ObjectReference, bool) {
if object == nil {
return nil, false
}
if ref, ok := object.(*v1.ObjectReference); ok {
if !strings.HasPrefix(ref.FieldPath, ImplicitContainerPrefix) {
return ref, true
}
}
return nil, false
}
func (irecorder *innerEventRecorder) Event(object runtime.Object, eventtype, reason, message string) {
if ref, ok := irecorder.shouldRecordEvent(object); ok {
irecorder.recorder.Event(ref, eventtype, reason, message)
}
}
func (irecorder *innerEventRecorder) Eventf(object runtime.Object, eventtype, reason, messageFmt string, args ...interface{}) {
if ref, ok := irecorder.shouldRecordEvent(object); ok {
irecorder.recorder.Eventf(ref, eventtype, reason, messageFmt, args...)
}
}
func (irecorder *innerEventRecorder) PastEventf(object runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) {
if ref, ok := irecorder.shouldRecordEvent(object); ok {
irecorder.recorder.PastEventf(ref, timestamp, eventtype, reason, messageFmt, args...)
}
}
func (irecorder *innerEventRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) {
if ref, ok := irecorder.shouldRecordEvent(object); ok {
irecorder.recorder.AnnotatedEventf(ref, annotations, eventtype, reason, messageFmt, args...)
}
}
// Pod must not be nil.
func IsHostNetworkPod(pod *v1.Pod) bool {
return pod.Spec.HostNetwork
}
// TODO(random-liu): Convert PodStatus to running Pod, should be deprecated soon
func ConvertPodStatusToRunningPod(runtimeName string, podStatus *PodStatus) Pod {
runningPod := Pod{
ID: podStatus.ID,
Name: podStatus.Name,
Namespace: podStatus.Namespace,
}
for _, containerStatus := range podStatus.ContainerStatuses {
if containerStatus.State != ContainerStateRunning {
continue
}
container := &Container{
ID: containerStatus.ID,
Name: containerStatus.Name,
Image: containerStatus.Image,
ImageID: containerStatus.ImageID,
Hash: containerStatus.Hash,
State: containerStatus.State,
}
runningPod.Containers = append(runningPod.Containers, container)
}
// Populate sandboxes in kubecontainer.Pod
for _, sandbox := range podStatus.SandboxStatuses {
runningPod.Sandboxes = append(runningPod.Sandboxes, &Container{
ID: ContainerID{Type: runtimeName, ID: sandbox.Id},
State: SandboxToContainerState(sandbox.State),
})
}
return runningPod
}
// SandboxToContainerState converts runtimeapi.PodSandboxState to
// kubecontainer.ContainerState.
// This is only needed because we need to return sandboxes as if they were
// kubecontainer.Containers to avoid substantial changes to PLEG.
// TODO: Remove this once it becomes obsolete.
func SandboxToContainerState(state runtimeapi.PodSandboxState) ContainerState {
switch state {
case runtimeapi.PodSandboxState_SANDBOX_READY:
return ContainerStateRunning
case runtimeapi.PodSandboxState_SANDBOX_NOTREADY:
return ContainerStateExited
}
return ContainerStateUnknown
}
// FormatPod returns a string representing a pod in a human readable format,
// with pod UID as part of the string.
func FormatPod(pod *Pod) string {
// Use underscore as the delimiter because it is not allowed in pod name
// (DNS subdomain format), while allowed in the container name format.
return fmt.Sprintf("%s_%s(%s)", pod.Name, pod.Namespace, pod.ID)
}
// GetContainerSpec gets the container spec by containerName.
func GetContainerSpec(pod *v1.Pod, containerName string) *v1.Container {
for i, c := range pod.Spec.Containers {
if containerName == c.Name {
return &pod.Spec.Containers[i]
}
}
for i, c := range pod.Spec.InitContainers {
if containerName == c.Name {
return &pod.Spec.InitContainers[i]
}
}
return nil
}
// HasPrivilegedContainer returns true if any of the containers in the pod are privileged.
func HasPrivilegedContainer(pod *v1.Pod) bool {
for _, c := range append(pod.Spec.Containers, pod.Spec.InitContainers...) {
if c.SecurityContext != nil &&
c.SecurityContext.Privileged != nil &&
*c.SecurityContext.Privileged {
return true
}
}
return false
}
// MakePortMappings creates internal port mapping from api port mapping.
func MakePortMappings(container *v1.Container) (ports []PortMapping) {
names := make(map[string]struct{})
for _, p := range container.Ports {
pm := PortMapping{
HostPort: int(p.HostPort),
ContainerPort: int(p.ContainerPort),
Protocol: p.Protocol,
HostIP: p.HostIP,
}
// We need to create some default port name if it's not specified, since
// this is necessary for rkt.
// http://issue.k8s.io/7710
if p.Name == "" {
pm.Name = fmt.Sprintf("%s-%s:%d", container.Name, p.Protocol, p.ContainerPort)
} else {
pm.Name = fmt.Sprintf("%s-%s", container.Name, p.Name)
}
// Protect against exposing the same protocol-port more than once in a container.
if _, ok := names[pm.Name]; ok {
glog.Warningf("Port name conflicted, %q is defined more than once", pm.Name)
continue
}
ports = append(ports, pm)
names[pm.Name] = struct{}{}
}
return
}