diff --git a/pkg/kubelet/kuberuntime/helpers.go b/pkg/kubelet/kuberuntime/helpers.go index f17853e0535..510096c47e6 100644 --- a/pkg/kubelet/kuberuntime/helpers.go +++ b/pkg/kubelet/kuberuntime/helpers.go @@ -146,6 +146,16 @@ func getContainerSpec(pod *api.Pod, containerName string) *api.Container { return nil } +// getImageUID gets uid that will run the command(s) from image. +func (m *kubeGenericRuntimeManager) getImageUser(image string) (int64, error) { + imageStatus, err := m.imageService.ImageStatus(&runtimeApi.ImageSpec{Image: &image}) + if err != nil { + return 0, err + } + + return imageStatus.GetUid(), nil +} + // isContainerFailed returns true if container has exited and exitcode is not zero. func isContainerFailed(status *kubecontainer.ContainerStatus) bool { if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 { diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 2ae10fedb01..a27243f287c 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -40,6 +40,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/util/format" kubetypes "k8s.io/kubernetes/pkg/types" utilruntime "k8s.io/kubernetes/pkg/util/runtime" + "k8s.io/kubernetes/pkg/util/selinux" "k8s.io/kubernetes/pkg/util/sets" "k8s.io/kubernetes/pkg/util/term" ) @@ -136,9 +137,17 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *api.Conta return nil, err } + // Verify RunAsNonRoot. + imageUser, err := m.getImageUser(container.Image) + if err != nil { + return nil, err + } + if err := verifyRunAsNonRoot(pod, container, imageUser); err != nil { + return nil, err + } + command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs) containerLogsPath := buildContainerLogsPath(container.Name, restartCount) - podHasSELinuxLabel := pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.SELinuxOptions != nil restartCountUint32 := uint32(restartCount) config := &runtimeApi.ContainerConfig{ Metadata: &runtimeApi.ContainerMetadata{ @@ -151,24 +160,13 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *api.Conta WorkingDir: &container.WorkingDir, Labels: newContainerLabels(container, pod), Annotations: newContainerAnnotations(container, pod, restartCount), - Mounts: m.makeMounts(opts, container, podHasSELinuxLabel), Devices: makeDevices(opts), + Mounts: m.makeMounts(opts, container), LogPath: &containerLogsPath, Stdin: &container.Stdin, StdinOnce: &container.StdinOnce, Tty: &container.TTY, - Linux: m.generateLinuxContainerConfig(container, pod), - } - - // set privileged and readonlyRootfs - if container.SecurityContext != nil { - securityContext := container.SecurityContext - if securityContext.Privileged != nil { - config.Privileged = securityContext.Privileged - } - if securityContext.ReadOnlyRootFilesystem != nil { - config.ReadonlyRootfs = securityContext.ReadOnlyRootFilesystem - } + Linux: m.generateLinuxContainerConfig(container, pod, imageUser), } // set environment variables @@ -186,9 +184,10 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *api.Conta } // generateLinuxContainerConfig generates linux container config for kubelet runtime api. -func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api.Container, pod *api.Pod) *runtimeApi.LinuxContainerConfig { - linuxConfig := &runtimeApi.LinuxContainerConfig{ - Resources: &runtimeApi.LinuxContainerResources{}, +func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api.Container, pod *api.Pod, imageUser int64) *runtimeApi.LinuxContainerConfig { + lc := &runtimeApi.LinuxContainerConfig{ + Resources: &runtimeApi.LinuxContainerResources{}, + SecurityContext: m.determineEffectiveSecurityContext(pod, container, imageUser), } // set linux container resources @@ -208,49 +207,23 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api. // of CPU shares. cpuShares = milliCPUToShares(cpuRequest.MilliValue()) } - linuxConfig.Resources.CpuShares = &cpuShares + lc.Resources.CpuShares = &cpuShares if memoryLimit != 0 { - linuxConfig.Resources.MemoryLimitInBytes = &memoryLimit + lc.Resources.MemoryLimitInBytes = &memoryLimit } // Set OOM score of the container based on qos policy. Processes in lower-priority pods should // be killed first if the system runs out of memory. - linuxConfig.Resources.OomScoreAdj = &oomScoreAdj + lc.Resources.OomScoreAdj = &oomScoreAdj if m.cpuCFSQuota { // if cpuLimit.Amount is nil, then the appropriate default value is returned // to allow full usage of cpu resource. cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue()) - linuxConfig.Resources.CpuQuota = &cpuQuota - linuxConfig.Resources.CpuPeriod = &cpuPeriod + lc.Resources.CpuQuota = &cpuQuota + lc.Resources.CpuPeriod = &cpuPeriod } - // set security context options - if container.SecurityContext != nil { - securityContext := container.SecurityContext - if securityContext.Capabilities != nil { - linuxConfig.Capabilities = &runtimeApi.Capability{ - AddCapabilities: make([]string, len(securityContext.Capabilities.Add)), - DropCapabilities: make([]string, len(securityContext.Capabilities.Drop)), - } - for index, value := range securityContext.Capabilities.Add { - linuxConfig.Capabilities.AddCapabilities[index] = string(value) - } - for index, value := range securityContext.Capabilities.Drop { - linuxConfig.Capabilities.DropCapabilities[index] = string(value) - } - } - - if securityContext.SELinuxOptions != nil { - linuxConfig.SelinuxOptions = &runtimeApi.SELinuxOption{ - User: &securityContext.SELinuxOptions.User, - Role: &securityContext.SELinuxOptions.Role, - Type: &securityContext.SELinuxOptions.Type, - Level: &securityContext.SELinuxOptions.Level, - } - } - } - - return linuxConfig + return lc } // makeDevices generates container devices for kubelet runtime api. @@ -270,21 +243,20 @@ func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeApi.Device { } // makeMounts generates container volume mounts for kubelet runtime api. -func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerOptions, container *api.Container, podHasSELinuxLabel bool) []*runtimeApi.Mount { +func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerOptions, container *api.Container) []*runtimeApi.Mount { volumeMounts := []*runtimeApi.Mount{} for idx := range opts.Mounts { v := opts.Mounts[idx] - m := &runtimeApi.Mount{ - HostPath: &v.HostPath, - ContainerPath: &v.ContainerPath, - Readonly: &v.ReadOnly, - } - if podHasSELinuxLabel && v.SELinuxRelabel { - m.SelinuxRelabel = &v.SELinuxRelabel + selinuxRelabel := v.SELinuxRelabel && selinux.SELinuxEnabled() + mount := &runtimeApi.Mount{ + HostPath: &v.HostPath, + ContainerPath: &v.ContainerPath, + Readonly: &v.ReadOnly, + SelinuxRelabel: &selinuxRelabel, } - volumeMounts = append(volumeMounts, m) + volumeMounts = append(volumeMounts, mount) } // The reason we create and mount the log file in here (not in kubelet) is because @@ -301,9 +273,11 @@ func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerO glog.Errorf("Error on creating termination-log file %q: %v", containerLogPath, err) } else { fs.Close() + selinuxRelabel := selinux.SELinuxEnabled() volumeMounts = append(volumeMounts, &runtimeApi.Mount{ - HostPath: &containerLogPath, - ContainerPath: &container.TerminationMessagePath, + HostPath: &containerLogPath, + ContainerPath: &container.TerminationMessagePath, + SelinuxRelabel: &selinuxRelabel, }) } } diff --git a/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go b/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go index f7ac7e41cc7..3333e8e0aab 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go @@ -120,7 +120,7 @@ func (m *kubeGenericRuntimeManager) generatePodSandboxConfig(pod *api.Pod, attem // TODO: refactor kubelet to get cgroup parent for pod instead of containers cgroupParent = opts.CgroupParent } - podSandboxConfig.Linux = generatePodSandboxLinuxConfig(pod, cgroupParent) + podSandboxConfig.Linux = m.generatePodSandboxLinuxConfig(pod, cgroupParent) if len(portMappings) > 0 { podSandboxConfig.PortMappings = portMappings } @@ -129,26 +129,43 @@ func (m *kubeGenericRuntimeManager) generatePodSandboxConfig(pod *api.Pod, attem } // generatePodSandboxLinuxConfig generates LinuxPodSandboxConfig from api.Pod. -func generatePodSandboxLinuxConfig(pod *api.Pod, cgroupParent string) *runtimeApi.LinuxPodSandboxConfig { +func (m *kubeGenericRuntimeManager) generatePodSandboxLinuxConfig(pod *api.Pod, cgroupParent string) *runtimeApi.LinuxPodSandboxConfig { if pod.Spec.SecurityContext == nil && cgroupParent == "" { return nil } - linuxPodSandboxConfig := &runtimeApi.LinuxPodSandboxConfig{} + lc := &runtimeApi.LinuxPodSandboxConfig{} + if cgroupParent != "" { + lc.CgroupParent = &cgroupParent + } if pod.Spec.SecurityContext != nil { - securityContext := pod.Spec.SecurityContext - linuxPodSandboxConfig.NamespaceOptions = &runtimeApi.NamespaceOption{ - HostNetwork: &securityContext.HostNetwork, - HostIpc: &securityContext.HostIPC, - HostPid: &securityContext.HostPID, + sc := pod.Spec.SecurityContext + lc.SecurityContext = &runtimeApi.LinuxSandboxSecurityContext{ + NamespaceOptions: &runtimeApi.NamespaceOption{ + HostNetwork: &sc.HostNetwork, + HostIpc: &sc.HostIPC, + HostPid: &sc.HostPID, + }, + RunAsUser: sc.RunAsUser, + } + + if groups := m.runtimeHelper.GetExtraSupplementalGroupsForPod(pod); len(groups) > 0 { + lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, groups...) + } + if sc.SupplementalGroups != nil { + lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, sc.SupplementalGroups...) + } + if sc.SELinuxOptions != nil { + lc.SecurityContext.SelinuxOptions = &runtimeApi.SELinuxOption{ + User: &sc.SELinuxOptions.User, + Role: &sc.SELinuxOptions.Role, + Type: &sc.SELinuxOptions.Type, + Level: &sc.SELinuxOptions.Level, + } } } - if cgroupParent != "" { - linuxPodSandboxConfig.CgroupParent = &cgroupParent - } - - return linuxPodSandboxConfig + return lc } // getKubeletSandboxes lists all (or just the running) sandboxes managed by kubelet. diff --git a/pkg/kubelet/kuberuntime/security_context.go b/pkg/kubelet/kuberuntime/security_context.go new file mode 100644 index 00000000000..71ca4b54905 --- /dev/null +++ b/pkg/kubelet/kuberuntime/security_context.go @@ -0,0 +1,128 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kuberuntime + +import ( + "fmt" + + "k8s.io/kubernetes/pkg/api" + runtimeapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime" + "k8s.io/kubernetes/pkg/securitycontext" +) + +// determineEffectiveSecurityContext gets container's security context from api.Pod and api.Container. +func (m *kubeGenericRuntimeManager) determineEffectiveSecurityContext(pod *api.Pod, container *api.Container, imageUser int64) *runtimeapi.LinuxContainerSecurityContext { + effectiveSc := securitycontext.DetermineEffectiveSecurityContext(pod, container) + synthesized := convertToRuntimeSecurityContext(effectiveSc) + if synthesized == nil { + synthesized = &runtimeapi.LinuxContainerSecurityContext{} + } + + // set RunAsUser. + if synthesized.RunAsUser == nil { + synthesized.RunAsUser = &imageUser + } + + // set namespace options and supplemental groups. + podSc := pod.Spec.SecurityContext + if podSc == nil { + return synthesized + } + synthesized.NamespaceOptions = &runtimeapi.NamespaceOption{ + HostNetwork: &podSc.HostNetwork, + HostIpc: &podSc.HostIPC, + HostPid: &podSc.HostPID, + } + if podSc.FSGroup != nil { + synthesized.SupplementalGroups = append(synthesized.SupplementalGroups, *podSc.FSGroup) + } + if groups := m.runtimeHelper.GetExtraSupplementalGroupsForPod(pod); len(groups) > 0 { + synthesized.SupplementalGroups = append(synthesized.SupplementalGroups, groups...) + } + if podSc.SupplementalGroups != nil { + synthesized.SupplementalGroups = append(synthesized.SupplementalGroups, podSc.SupplementalGroups...) + } + + return synthesized +} + +// verifyRunAsNonRoot verifies RunAsNonRoot. +func verifyRunAsNonRoot(pod *api.Pod, container *api.Container, imageUser int64) error { + effectiveSc := securitycontext.DetermineEffectiveSecurityContext(pod, container) + if effectiveSc == nil || effectiveSc.RunAsNonRoot == nil { + return nil + } + + if effectiveSc.RunAsUser != nil && *effectiveSc.RunAsUser == 0 { + return fmt.Errorf("container's runAsUser breaks non-root policy") + } + + if imageUser == 0 { + return fmt.Errorf("container has runAsNonRoot and image will run as root") + } + + return nil +} + +// convertToRuntimeSecurityContext converts api.SecurityContext to runtimeapi.SecurityContext. +func convertToRuntimeSecurityContext(securityContext *api.SecurityContext) *runtimeapi.LinuxContainerSecurityContext { + if securityContext == nil { + return nil + } + + return &runtimeapi.LinuxContainerSecurityContext{ + RunAsUser: securityContext.RunAsUser, + Privileged: securityContext.Privileged, + ReadonlyRootfs: securityContext.ReadOnlyRootFilesystem, + Capabilities: convertToRuntimeCapabilities(securityContext.Capabilities), + SelinuxOptions: convertToRuntimeSELinuxOption(securityContext.SELinuxOptions), + } +} + +// convertToRuntimeSELinuxOption converts api.SELinuxOptions to runtimeapi.SELinuxOption. +func convertToRuntimeSELinuxOption(opts *api.SELinuxOptions) *runtimeapi.SELinuxOption { + if opts == nil { + return nil + } + + return &runtimeapi.SELinuxOption{ + User: &opts.User, + Role: &opts.Role, + Type: &opts.Type, + Level: &opts.Level, + } +} + +// convertToRuntimeCapabilities converts api.Capabilities to runtimeapi.Capability. +func convertToRuntimeCapabilities(opts *api.Capabilities) *runtimeapi.Capability { + if opts == nil { + return nil + } + + capabilities := &runtimeapi.Capability{ + AddCapabilities: make([]string, len(opts.Add)), + DropCapabilities: make([]string, len(opts.Drop)), + } + for index, value := range opts.Add { + capabilities.AddCapabilities[index] = string(value) + } + for index, value := range opts.Drop { + capabilities.DropCapabilities[index] = string(value) + } + + return capabilities +}