Merge pull request #111090 from kinvolk/rata/userns-support-2022

Add support for user namespaces phase 1 (KEP 127)
This commit is contained in:
Kubernetes Prow Robot
2022-08-03 13:05:47 -07:00
committed by GitHub
104 changed files with 2763 additions and 947 deletions

View File

@@ -2976,6 +2976,7 @@ type PodSpec struct {
// If the OS field is set to windows, following fields must be unset:
// - spec.hostPID
// - spec.hostIPC
// - spec.hostUsers
// - spec.securityContext.seLinuxOptions
// - spec.securityContext.seccompProfile
// - spec.securityContext.fsGroup
@@ -3078,6 +3079,18 @@ type PodSecurityContext struct {
// +k8s:conversion-gen=false
// +optional
ShareProcessNamespace *bool
// Use the host's user namespace.
// Optional: Default to true.
// If set to true or not present, the pod will be run in the host user namespace, useful
// for when the pod needs a feature only available to the host user namespace, such as
// loading a kernel module with CAP_SYS_MODULE.
// When set to false, a new user namespace is created for the pod. Setting false is useful
// for mitigating container breakout vulnerabilities even allowing users to run their
// containers as root without actually having root privileges on the host.
// Note that this field cannot be set when spec.os.name is windows.
// +k8s:conversion-gen=false
// +optional
HostUsers *bool
// The SELinux context to be applied to all containers.
// If unspecified, the container runtime will allocate a random SELinux context for each
// container. May also be set in SecurityContext. If set in

View File

@@ -303,6 +303,7 @@ func Convert_core_PodSpec_To_v1_PodSpec(in *core.PodSpec, out *v1.PodSpec, s con
out.HostNetwork = in.SecurityContext.HostNetwork
out.HostIPC = in.SecurityContext.HostIPC
out.ShareProcessNamespace = in.SecurityContext.ShareProcessNamespace
out.HostUsers = in.SecurityContext.HostUsers
}
return nil
@@ -358,6 +359,7 @@ func Convert_v1_PodSpec_To_core_PodSpec(in *v1.PodSpec, out *core.PodSpec, s con
out.SecurityContext.HostPID = in.HostPID
out.SecurityContext.HostIPC = in.HostIPC
out.SecurityContext.ShareProcessNamespace = in.ShareProcessNamespace
out.SecurityContext.HostUsers = in.HostUsers
return nil
}

View File

@@ -6093,6 +6093,7 @@ func autoConvert_core_PodSecurityContext_To_v1_PodSecurityContext(in *core.PodSe
// INFO: in.HostPID opted out of conversion generation
// INFO: in.HostIPC opted out of conversion generation
// INFO: in.ShareProcessNamespace opted out of conversion generation
// INFO: in.HostUsers opted out of conversion generation
out.SELinuxOptions = (*v1.SELinuxOptions)(unsafe.Pointer(in.SELinuxOptions))
out.WindowsOptions = (*v1.WindowsSecurityContextOptions)(unsafe.Pointer(in.WindowsOptions))
out.RunAsUser = (*int64)(unsafe.Pointer(in.RunAsUser))
@@ -6186,6 +6187,7 @@ func autoConvert_v1_PodSpec_To_core_PodSpec(in *v1.PodSpec, out *core.PodSpec, s
out.TopologySpreadConstraints = *(*[]core.TopologySpreadConstraint)(unsafe.Pointer(&in.TopologySpreadConstraints))
out.SetHostnameAsFQDN = (*bool)(unsafe.Pointer(in.SetHostnameAsFQDN))
out.OS = (*core.PodOS)(unsafe.Pointer(in.OS))
// INFO: in.HostUsers opted out of conversion generation
return nil
}

View File

@@ -3099,6 +3099,52 @@ func validateContainerCommon(ctr *core.Container, volumes map[string]core.Volume
allErrs = append(allErrs, validatePullPolicy(ctr.ImagePullPolicy, path.Child("imagePullPolicy"))...)
allErrs = append(allErrs, ValidateResourceRequirements(&ctr.Resources, path.Child("resources"), opts)...)
allErrs = append(allErrs, ValidateSecurityContext(ctr.SecurityContext, path.Child("securityContext"))...)
return allErrs
}
func validateHostUsers(spec *core.PodSpec, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
// Only make the following checks if hostUsers is false (otherwise, the container uses the
// same userns as the host, and so there isn't anything to check).
if spec.SecurityContext == nil || spec.SecurityContext.HostUsers == nil || *spec.SecurityContext.HostUsers == true {
return allErrs
}
// For now only these volumes are supported:
// - configmap
// - secret
// - downwardAPI
// - emptyDir
// - projected
// So reject anything else.
for i, vol := range spec.Volumes {
switch {
case vol.EmptyDir != nil:
case vol.Secret != nil:
case vol.DownwardAPI != nil:
case vol.ConfigMap != nil:
case vol.Projected != nil:
default:
allErrs = append(allErrs, field.Forbidden(fldPath.Child("volumes").Index(i), "volume type not supported when `pod.Spec.HostUsers` is false"))
}
}
// We decided to restrict the usage of userns with other host namespaces:
// https://github.com/kubernetes/kubernetes/pull/111090#discussion_r935994282
// The tl;dr is: you can easily run into permission issues that seem unexpected, we don't
// know of any good use case and we can always enable them later.
// Note we already validated above spec.SecurityContext is not nil.
if spec.SecurityContext.HostNetwork {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("hostNetwork"), "when `pod.Spec.HostUsers` is false"))
}
if spec.SecurityContext.HostPID {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("HostPID"), "when `pod.Spec.HostUsers` is false"))
}
if spec.SecurityContext.HostIPC {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("HostIPC"), "when `pod.Spec.HostUsers` is false"))
}
return allErrs
}
@@ -3569,6 +3615,7 @@ func ValidatePodSpec(spec *core.PodSpec, podMeta *metav1.ObjectMeta, fldPath *fi
allErrs = append(allErrs, validateReadinessGates(spec.ReadinessGates, fldPath.Child("readinessGates"))...)
allErrs = append(allErrs, validateTopologySpreadConstraints(spec.TopologySpreadConstraints, fldPath.Child("topologySpreadConstraints"))...)
allErrs = append(allErrs, validateWindowsHostProcessPod(spec, fldPath, opts)...)
allErrs = append(allErrs, validateHostUsers(spec, fldPath)...)
if len(spec.ServiceAccountName) > 0 {
for _, msg := range ValidateServiceAccountName(spec.ServiceAccountName, false) {
allErrs = append(allErrs, field.Invalid(fldPath.Child("serviceAccountName"), spec.ServiceAccountName, msg))
@@ -3661,6 +3708,9 @@ func validateWindows(spec *core.PodSpec, fldPath *field.Path) field.ErrorList {
if securityContext.SELinuxOptions != nil {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("securityContext").Child("seLinuxOptions"), "cannot be set for a windows pod"))
}
if securityContext.HostUsers != nil {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("hostUsers"), "cannot be set for a windows pod"))
}
if securityContext.HostPID {
allErrs = append(allErrs, field.Forbidden(fldPath.Child("hostPID"), "cannot be set for a windows pod"))
}

View File

@@ -18399,6 +18399,7 @@ func TestValidateOSFields(t *testing.T) {
"SecurityContext.HostIPC",
"SecurityContext.HostNetwork",
"SecurityContext.HostPID",
"SecurityContext.HostUsers",
"SecurityContext.RunAsGroup",
"SecurityContext.RunAsUser",
"SecurityContext.SELinuxOptions",
@@ -20694,6 +20695,172 @@ func TestValidateNonSpecialIP(t *testing.T) {
}
}
func TestValidateHostUsers(t *testing.T) {
falseVar := false
trueVar := true
cases := []struct {
name string
success bool
spec *core.PodSpec
}{
{
name: "empty",
success: true,
spec: &core.PodSpec{},
},
{
name: "hostUsers unset",
success: true,
spec: &core.PodSpec{
SecurityContext: &core.PodSecurityContext{},
},
},
{
name: "hostUsers=false",
success: true,
spec: &core.PodSpec{
SecurityContext: &core.PodSecurityContext{
HostUsers: &falseVar,
},
},
},
{
name: "hostUsers=true",
success: true,
spec: &core.PodSpec{
SecurityContext: &core.PodSecurityContext{
HostUsers: &trueVar,
},
},
},
{
name: "hostUsers=false & volumes",
success: true,
spec: &core.PodSpec{
SecurityContext: &core.PodSecurityContext{
HostUsers: &falseVar,
},
Volumes: []core.Volume{
{
Name: "configmap",
VolumeSource: core.VolumeSource{
ConfigMap: &core.ConfigMapVolumeSource{
LocalObjectReference: core.LocalObjectReference{Name: "configmap"},
},
},
},
{
Name: "secret",
VolumeSource: core.VolumeSource{
Secret: &core.SecretVolumeSource{
SecretName: "secret",
},
},
},
{
Name: "downward-api",
VolumeSource: core.VolumeSource{
DownwardAPI: &core.DownwardAPIVolumeSource{},
},
},
{
Name: "proj",
VolumeSource: core.VolumeSource{
Projected: &core.ProjectedVolumeSource{},
},
},
{
Name: "empty-dir",
VolumeSource: core.VolumeSource{
EmptyDir: &core.EmptyDirVolumeSource{},
},
},
},
},
},
{
name: "hostUsers=false - unsupported volume",
success: false,
spec: &core.PodSpec{
SecurityContext: &core.PodSecurityContext{
HostUsers: &falseVar,
},
Volumes: []core.Volume{
{
Name: "host-path",
VolumeSource: core.VolumeSource{
HostPath: &core.HostPathVolumeSource{},
},
},
},
},
},
{
// It should ignore unsupported volumes with hostUsers=true.
name: "hostUsers=true - unsupported volume",
success: true,
spec: &core.PodSpec{
SecurityContext: &core.PodSecurityContext{
HostUsers: &trueVar,
},
Volumes: []core.Volume{
{
Name: "host-path",
VolumeSource: core.VolumeSource{
HostPath: &core.HostPathVolumeSource{},
},
},
},
},
},
{
name: "hostUsers=false & HostNetwork",
success: false,
spec: &core.PodSpec{
SecurityContext: &core.PodSecurityContext{
HostUsers: &falseVar,
HostNetwork: true,
},
},
},
{
name: "hostUsers=false & HostPID",
success: false,
spec: &core.PodSpec{
SecurityContext: &core.PodSecurityContext{
HostUsers: &falseVar,
HostPID: true,
},
},
},
{
name: "hostUsers=false & HostIPC",
success: false,
spec: &core.PodSpec{
SecurityContext: &core.PodSecurityContext{
HostUsers: &falseVar,
HostIPC: true,
},
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
fPath := field.NewPath("spec")
allErrs := validateHostUsers(tc.spec, fPath)
if !tc.success && len(allErrs) == 0 {
t.Errorf("Unexpected success")
}
if tc.success && len(allErrs) != 0 {
t.Errorf("Unexpected error(s): %v", allErrs)
}
})
}
}
func TestValidateWindowsHostProcessPod(t *testing.T) {
const containerName = "container"
falseVar := false

View File

@@ -3736,6 +3736,11 @@ func (in *PodSecurityContext) DeepCopyInto(out *PodSecurityContext) {
*out = new(bool)
**out = **in
}
if in.HostUsers != nil {
in, out := &in.HostUsers, &out.HostUsers
*out = new(bool)
**out = **in
}
if in.SELinuxOptions != nil {
in, out := &in.SELinuxOptions, &out.SELinuxOptions
*out = new(SELinuxOptions)