Implement security context in kuberuntime

2016-11-04 19:53:19 +08:00 · 2016-11-04 19:53:19 +08:00 · 3df60eb163
commit 3df60eb163
parent 476cd96098
4 changed files with 202 additions and 73 deletions
--- a/pkg/kubelet/kuberuntime/helpers.go
+++ b/pkg/kubelet/kuberuntime/helpers.go
@ -146,6 +146,16 @@ func getContainerSpec(pod *api.Pod, containerName string) *api.Container {
 	return nil
 }

+// getImageUID gets uid that will run the command(s) from image.
+func (m *kubeGenericRuntimeManager) getImageUser(image string) (int64, error) {
+	imageStatus, err := m.imageService.ImageStatus(&runtimeApi.ImageSpec{Image: &image})
+	if err != nil {
+		return 0, err
+	}
+
+	return imageStatus.GetUid(), nil
+}
+
 // isContainerFailed returns true if container has exited and exitcode is not zero.
 func isContainerFailed(status *kubecontainer.ContainerStatus) bool {
 	if status.State == kubecontainer.ContainerStateExited && status.ExitCode != 0 {
--- a/pkg/kubelet/kuberuntime/kuberuntime_container.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go
@ -40,6 +40,7 @@ import (
 	"k8s.io/kubernetes/pkg/kubelet/util/format"
 	kubetypes "k8s.io/kubernetes/pkg/types"
 	utilruntime "k8s.io/kubernetes/pkg/util/runtime"
+	"k8s.io/kubernetes/pkg/util/selinux"
 	"k8s.io/kubernetes/pkg/util/sets"
 	"k8s.io/kubernetes/pkg/util/term"
 )
@ -136,9 +137,17 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *api.Conta
 		return nil, err
 	}

+	// Verify RunAsNonRoot.
+	imageUser, err := m.getImageUser(container.Image)
+	if err != nil {
+		return nil, err
+	}
+	if err := verifyRunAsNonRoot(pod, container, imageUser); err != nil {
+		return nil, err
+	}
+
 	command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs)
 	containerLogsPath := buildContainerLogsPath(container.Name, restartCount)
-	podHasSELinuxLabel := pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.SELinuxOptions != nil
 	restartCountUint32 := uint32(restartCount)
 	config := &runtimeApi.ContainerConfig{
 		Metadata: &runtimeApi.ContainerMetadata{
@ -151,24 +160,13 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *api.Conta
 		WorkingDir:  &container.WorkingDir,
 		Labels:      newContainerLabels(container, pod),
 		Annotations: newContainerAnnotations(container, pod, restartCount),
-		Mounts:      m.makeMounts(opts, container, podHasSELinuxLabel),
 		Devices:     makeDevices(opts),
+		Mounts:      m.makeMounts(opts, container),
 		LogPath:     &containerLogsPath,
 		Stdin:       &container.Stdin,
 		StdinOnce:   &container.StdinOnce,
 		Tty:         &container.TTY,
-		Linux:       m.generateLinuxContainerConfig(container, pod),
-	}
-
-	// set privileged and readonlyRootfs
-	if container.SecurityContext != nil {
-		securityContext := container.SecurityContext
-		if securityContext.Privileged != nil {
-			config.Privileged = securityContext.Privileged
-		}
-		if securityContext.ReadOnlyRootFilesystem != nil {
-			config.ReadonlyRootfs = securityContext.ReadOnlyRootFilesystem
-		}
+		Linux:       m.generateLinuxContainerConfig(container, pod, imageUser),
 	}

 	// set environment variables
@ -186,9 +184,10 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(container *api.Conta
 }

 // generateLinuxContainerConfig generates linux container config for kubelet runtime api.
-func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api.Container, pod *api.Pod) *runtimeApi.LinuxContainerConfig {
-	linuxConfig := &runtimeApi.LinuxContainerConfig{
+func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api.Container, pod *api.Pod, imageUser int64) *runtimeApi.LinuxContainerConfig {
+	lc := &runtimeApi.LinuxContainerConfig{
 		Resources:       &runtimeApi.LinuxContainerResources{},
+		SecurityContext: m.determineEffectiveSecurityContext(pod, container, imageUser),
 	}

 	// set linux container resources
@ -208,49 +207,23 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *api.
 		// of CPU shares.
 		cpuShares = milliCPUToShares(cpuRequest.MilliValue())
 	}
-	linuxConfig.Resources.CpuShares = &cpuShares
+	lc.Resources.CpuShares = &cpuShares
 	if memoryLimit != 0 {
-		linuxConfig.Resources.MemoryLimitInBytes = &memoryLimit
+		lc.Resources.MemoryLimitInBytes = &memoryLimit
 	}
 	// Set OOM score of the container based on qos policy. Processes in lower-priority pods should
 	// be killed first if the system runs out of memory.
-	linuxConfig.Resources.OomScoreAdj = &oomScoreAdj
+	lc.Resources.OomScoreAdj = &oomScoreAdj

 	if m.cpuCFSQuota {
 		// if cpuLimit.Amount is nil, then the appropriate default value is returned
 		// to allow full usage of cpu resource.
 		cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue())
-		linuxConfig.Resources.CpuQuota = &cpuQuota
-		linuxConfig.Resources.CpuPeriod = &cpuPeriod
+		lc.Resources.CpuQuota = &cpuQuota
+		lc.Resources.CpuPeriod = &cpuPeriod
 	}

-	// set security context options
-	if container.SecurityContext != nil {
-		securityContext := container.SecurityContext
-		if securityContext.Capabilities != nil {
-			linuxConfig.Capabilities = &runtimeApi.Capability{
-				AddCapabilities:  make([]string, len(securityContext.Capabilities.Add)),
-				DropCapabilities: make([]string, len(securityContext.Capabilities.Drop)),
-			}
-			for index, value := range securityContext.Capabilities.Add {
-				linuxConfig.Capabilities.AddCapabilities[index] = string(value)
-			}
-			for index, value := range securityContext.Capabilities.Drop {
-				linuxConfig.Capabilities.DropCapabilities[index] = string(value)
-			}
-		}
-
-		if securityContext.SELinuxOptions != nil {
-			linuxConfig.SelinuxOptions = &runtimeApi.SELinuxOption{
-				User:  &securityContext.SELinuxOptions.User,
-				Role:  &securityContext.SELinuxOptions.Role,
-				Type:  &securityContext.SELinuxOptions.Type,
-				Level: &securityContext.SELinuxOptions.Level,
-			}
-		}
-	}
-
-	return linuxConfig
+	return lc
 }

 // makeDevices generates container devices for kubelet runtime api.
@ -270,21 +243,20 @@ func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeApi.Device {
 }

 // makeMounts generates container volume mounts for kubelet runtime api.
-func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerOptions, container *api.Container, podHasSELinuxLabel bool) []*runtimeApi.Mount {
+func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerOptions, container *api.Container) []*runtimeApi.Mount {
 	volumeMounts := []*runtimeApi.Mount{}

 	for idx := range opts.Mounts {
 		v := opts.Mounts[idx]
-		m := &runtimeApi.Mount{
+		selinuxRelabel := v.SELinuxRelabel && selinux.SELinuxEnabled()
+		mount := &runtimeApi.Mount{
 			HostPath:       &v.HostPath,
 			ContainerPath:  &v.ContainerPath,
 			Readonly:       &v.ReadOnly,
-		}
-		if podHasSELinuxLabel && v.SELinuxRelabel {
-			m.SelinuxRelabel = &v.SELinuxRelabel
+			SelinuxRelabel: &selinuxRelabel,
 		}

-		volumeMounts = append(volumeMounts, m)
+		volumeMounts = append(volumeMounts, mount)
 	}

 	// The reason we create and mount the log file in here (not in kubelet) is because
@ -301,9 +273,11 @@ func (m *kubeGenericRuntimeManager) makeMounts(opts *kubecontainer.RunContainerO
 			glog.Errorf("Error on creating termination-log file %q: %v", containerLogPath, err)
 		} else {
 			fs.Close()
+			selinuxRelabel := selinux.SELinuxEnabled()
 			volumeMounts = append(volumeMounts, &runtimeApi.Mount{
 				HostPath:       &containerLogPath,
 				ContainerPath:  &container.TerminationMessagePath,
+				SelinuxRelabel: &selinuxRelabel,
 			})
 		}
 	}
--- a/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go
+++ b/pkg/kubelet/kuberuntime/kuberuntime_sandbox.go
@ -120,7 +120,7 @@ func (m *kubeGenericRuntimeManager) generatePodSandboxConfig(pod *api.Pod, attem
 		// TODO: refactor kubelet to get cgroup parent for pod instead of containers
 		cgroupParent = opts.CgroupParent
 	}
-	podSandboxConfig.Linux = generatePodSandboxLinuxConfig(pod, cgroupParent)
+	podSandboxConfig.Linux = m.generatePodSandboxLinuxConfig(pod, cgroupParent)
 	if len(portMappings) > 0 {
 		podSandboxConfig.PortMappings = portMappings
 	}
@ -129,26 +129,43 @@ func (m *kubeGenericRuntimeManager) generatePodSandboxConfig(pod *api.Pod, attem
 }

 // generatePodSandboxLinuxConfig generates LinuxPodSandboxConfig from api.Pod.
-func generatePodSandboxLinuxConfig(pod *api.Pod, cgroupParent string) *runtimeApi.LinuxPodSandboxConfig {
+func (m *kubeGenericRuntimeManager) generatePodSandboxLinuxConfig(pod *api.Pod, cgroupParent string) *runtimeApi.LinuxPodSandboxConfig {
 	if pod.Spec.SecurityContext == nil && cgroupParent == "" {
 		return nil
 	}

-	linuxPodSandboxConfig := &runtimeApi.LinuxPodSandboxConfig{}
-	if pod.Spec.SecurityContext != nil {
-		securityContext := pod.Spec.SecurityContext
-		linuxPodSandboxConfig.NamespaceOptions = &runtimeApi.NamespaceOption{
-			HostNetwork: &securityContext.HostNetwork,
-			HostIpc:     &securityContext.HostIPC,
-			HostPid:     &securityContext.HostPID,
-		}
-	}
-
+	lc := &runtimeApi.LinuxPodSandboxConfig{}
 	if cgroupParent != "" {
-		linuxPodSandboxConfig.CgroupParent = &cgroupParent
+		lc.CgroupParent = &cgroupParent
+	}
+	if pod.Spec.SecurityContext != nil {
+		sc := pod.Spec.SecurityContext
+		lc.SecurityContext = &runtimeApi.LinuxSandboxSecurityContext{
+			NamespaceOptions: &runtimeApi.NamespaceOption{
+				HostNetwork: &sc.HostNetwork,
+				HostIpc:     &sc.HostIPC,
+				HostPid:     &sc.HostPID,
+			},
+			RunAsUser: sc.RunAsUser,
 		}

-	return linuxPodSandboxConfig
+		if groups := m.runtimeHelper.GetExtraSupplementalGroupsForPod(pod); len(groups) > 0 {
+			lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, groups...)
+		}
+		if sc.SupplementalGroups != nil {
+			lc.SecurityContext.SupplementalGroups = append(lc.SecurityContext.SupplementalGroups, sc.SupplementalGroups...)
+		}
+		if sc.SELinuxOptions != nil {
+			lc.SecurityContext.SelinuxOptions = &runtimeApi.SELinuxOption{
+				User:  &sc.SELinuxOptions.User,
+				Role:  &sc.SELinuxOptions.Role,
+				Type:  &sc.SELinuxOptions.Type,
+				Level: &sc.SELinuxOptions.Level,
+			}
+		}
+	}
+
+	return lc
 }

 // getKubeletSandboxes lists all (or just the running) sandboxes managed by kubelet.
--- a/pkg/kubelet/kuberuntime/security_context.go
+++ b/pkg/kubelet/kuberuntime/security_context.go
@ -0,0 +1,128 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package kuberuntime
+
+import (
+	"fmt"
+
+	"k8s.io/kubernetes/pkg/api"
+	runtimeapi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
+	"k8s.io/kubernetes/pkg/securitycontext"
+)
+
+// determineEffectiveSecurityContext gets container's security context from api.Pod and api.Container.
+func (m *kubeGenericRuntimeManager) determineEffectiveSecurityContext(pod *api.Pod, container *api.Container, imageUser int64) *runtimeapi.LinuxContainerSecurityContext {
+	effectiveSc := securitycontext.DetermineEffectiveSecurityContext(pod, container)
+	synthesized := convertToRuntimeSecurityContext(effectiveSc)
+	if synthesized == nil {
+		synthesized = &runtimeapi.LinuxContainerSecurityContext{}
+	}
+
+	// set RunAsUser.
+	if synthesized.RunAsUser == nil {
+		synthesized.RunAsUser = &imageUser
+	}
+
+	// set namespace options and supplemental groups.
+	podSc := pod.Spec.SecurityContext
+	if podSc == nil {
+		return synthesized
+	}
+	synthesized.NamespaceOptions = &runtimeapi.NamespaceOption{
+		HostNetwork: &podSc.HostNetwork,
+		HostIpc:     &podSc.HostIPC,
+		HostPid:     &podSc.HostPID,
+	}
+	if podSc.FSGroup != nil {
+		synthesized.SupplementalGroups = append(synthesized.SupplementalGroups, *podSc.FSGroup)
+	}
+	if groups := m.runtimeHelper.GetExtraSupplementalGroupsForPod(pod); len(groups) > 0 {
+		synthesized.SupplementalGroups = append(synthesized.SupplementalGroups, groups...)
+	}
+	if podSc.SupplementalGroups != nil {
+		synthesized.SupplementalGroups = append(synthesized.SupplementalGroups, podSc.SupplementalGroups...)
+	}
+
+	return synthesized
+}
+
+// verifyRunAsNonRoot verifies RunAsNonRoot.
+func verifyRunAsNonRoot(pod *api.Pod, container *api.Container, imageUser int64) error {
+	effectiveSc := securitycontext.DetermineEffectiveSecurityContext(pod, container)
+	if effectiveSc == nil || effectiveSc.RunAsNonRoot == nil {
+		return nil
+	}
+
+	if effectiveSc.RunAsUser != nil && *effectiveSc.RunAsUser == 0 {
+		return fmt.Errorf("container's runAsUser breaks non-root policy")
+	}
+
+	if imageUser == 0 {
+		return fmt.Errorf("container has runAsNonRoot and image will run as root")
+	}
+
+	return nil
+}
+
+// convertToRuntimeSecurityContext converts api.SecurityContext to runtimeapi.SecurityContext.
+func convertToRuntimeSecurityContext(securityContext *api.SecurityContext) *runtimeapi.LinuxContainerSecurityContext {
+	if securityContext == nil {
+		return nil
+	}
+
+	return &runtimeapi.LinuxContainerSecurityContext{
+		RunAsUser:      securityContext.RunAsUser,
+		Privileged:     securityContext.Privileged,
+		ReadonlyRootfs: securityContext.ReadOnlyRootFilesystem,
+		Capabilities:   convertToRuntimeCapabilities(securityContext.Capabilities),
+		SelinuxOptions: convertToRuntimeSELinuxOption(securityContext.SELinuxOptions),
+	}
+}
+
+// convertToRuntimeSELinuxOption converts api.SELinuxOptions to runtimeapi.SELinuxOption.
+func convertToRuntimeSELinuxOption(opts *api.SELinuxOptions) *runtimeapi.SELinuxOption {
+	if opts == nil {
+		return nil
+	}
+
+	return &runtimeapi.SELinuxOption{
+		User:  &opts.User,
+		Role:  &opts.Role,
+		Type:  &opts.Type,
+		Level: &opts.Level,
+	}
+}
+
+// convertToRuntimeCapabilities converts api.Capabilities to runtimeapi.Capability.
+func convertToRuntimeCapabilities(opts *api.Capabilities) *runtimeapi.Capability {
+	if opts == nil {
+		return nil
+	}
+
+	capabilities := &runtimeapi.Capability{
+		AddCapabilities:  make([]string, len(opts.Add)),
+		DropCapabilities: make([]string, len(opts.Drop)),
+	}
+	for index, value := range opts.Add {
+		capabilities.AddCapabilities[index] = string(value)
+	}
+	for index, value := range opts.Drop {
+		capabilities.DropCapabilities[index] = string(value)
+	}
+
+	return capabilities
+}