Add sysctl whitelist on the node

This commit is contained in:
Dr. Stefan Schimanski
2016-08-19 10:53:25 +02:00
parent ed36baed20
commit e356e52247
13 changed files with 489 additions and 4 deletions

View File

@@ -0,0 +1,60 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"strings"
)
// Namespace represents a kernel namespace name.
type Namespace string
const (
// the Linux IPC namespace
IpcNamespace = Namespace("ipc")
// the network namespace
NetNamespace = Namespace("net")
// the zero value if no namespace is known
UnknownNamespace = Namespace("")
)
var namespaces = map[string]Namespace{
"kernel.sem": IpcNamespace,
}
var prefixNamespaces = map[string]Namespace{
"kernel.shm": IpcNamespace,
"kernel.msg": IpcNamespace,
"fs.mqueue.": IpcNamespace,
"net.": NetNamespace,
}
// NamespacedBy returns the namespace of the Linux kernel for a sysctl, or
// UnknownNamespace if the sysctl is not known to be namespaced.
func NamespacedBy(val string) Namespace {
if ns, found := namespaces[val]; found {
return ns
}
for p, ns := range prefixNamespaces {
if strings.HasPrefix(val, p) {
return ns
}
}
return UnknownNamespace
}

View File

@@ -0,0 +1,36 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"testing"
)
func TestNamespacedBy(t *testing.T) {
tests := map[string]Namespace{
"kernel.shm_rmid_forced": IpcNamespace,
"net.a.b.c": NetNamespace,
"fs.mqueue.a.b.c": IpcNamespace,
"foo": UnknownNamespace,
}
for sysctl, ns := range tests {
if got := NamespacedBy(sysctl); got != ns {
t.Errorf("wrong namespace for %q: got=%s want=%s", sysctl, got, ns)
}
}
}

View File

@@ -0,0 +1,96 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"fmt"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/dockertools"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
)
const (
UnsupportedReason = "SysctlUnsupported"
)
type runtimeAdmitHandler struct {
result lifecycle.PodAdmitResult
}
var _ lifecycle.PodAdmitHandler = &runtimeAdmitHandler{}
// NewRuntimeAdmitHandler returns a sysctlRuntimeAdmitHandler which checks whether
// the given runtime support sysctls.
func NewRuntimeAdmitHandler(runtime container.Runtime) (*runtimeAdmitHandler, error) {
if runtime.Type() == dockertools.DockerType {
v, err := runtime.APIVersion()
if err != nil {
return nil, fmt.Errorf("failed to get runtime version: %v", err)
}
// only Docker >= 1.12 supports sysctls
c, err := v.Compare(dockertools.DockerV112APIVersion)
if err != nil {
return nil, fmt.Errorf("failed to compare Docker version for sysctl support: %v", err)
}
if c >= 0 {
return &runtimeAdmitHandler{
result: lifecycle.PodAdmitResult{
Admit: true,
},
}, nil
}
return &runtimeAdmitHandler{
result: lifecycle.PodAdmitResult{
Admit: false,
Reason: UnsupportedReason,
Message: "Docker before 1.12 does not support sysctls",
},
}, nil
}
// for other runtimes like rkt sysctls are not supported
return &runtimeAdmitHandler{
result: lifecycle.PodAdmitResult{
Admit: false,
Reason: UnsupportedReason,
Message: fmt.Sprintf("runtime %v does not support sysctls", runtime.Type()),
},
}, nil
}
// Admit checks whether the runtime supports sysctls.
func (w *runtimeAdmitHandler) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
sysctls, unsafeSysctls, err := api.SysctlsFromPodAnnotations(attrs.Pod.Annotations)
if err != nil {
return lifecycle.PodAdmitResult{
Admit: false,
Reason: AnnotationInvalidReason,
Message: fmt.Sprintf("invalid sysctl annotation: %v", err),
}
}
if len(sysctls)+len(unsafeSysctls) > 0 {
return w.result
}
return lifecycle.PodAdmitResult{
Admit: true,
}
}

View File

@@ -0,0 +1,171 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"fmt"
"strings"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/validation"
extvalidation "k8s.io/kubernetes/pkg/apis/extensions/validation"
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
)
const (
AnnotationInvalidReason = "InvalidSysctlAnnotation"
ForbiddenReason = "SysctlForbidden"
)
// SafeSysctlWhitelist returns the whitelist of safe sysctls and safe sysctl patterns (ending in *).
//
// A sysctl is called safe iff
// - it is namespaced in the container or the pod
// - it is isolated, i.e. has no influence on any other pod on the same node.
func SafeSysctlWhitelist() []string {
return []string{
"kernel.shm_rmid_forced",
"net.ipv4.ip_local_port_range",
"net.ipv4.tcp_max_syn_backlog",
"net.ipv4.tcp_syncookies",
}
}
// Whitelist provides a list of allowed sysctls and sysctl patterns (ending in *)
// and a function to check whether a given sysctl matches this list.
type Whitelist interface {
// Validate checks that all sysctls given in a api.SysctlsPodAnnotationKey annotation
// are valid according to the whitelist.
Validate(pod *api.Pod) error
}
// patternWhitelist takes a list of sysctls or sysctl patterns (ending in *) and
// checks validity via a sysctl and prefix map, rejecting those which are not known
// to be namespaced.
type patternWhitelist struct {
sysctls map[string]Namespace
prefixes map[string]Namespace
annotationKey string
}
var _ lifecycle.PodAdmitHandler = &patternWhitelist{}
// NewWhitelist creates a new Whitelist from a list of sysctls and sysctl pattern (ending in *).
func NewWhitelist(patterns []string, annotationKey string) (*patternWhitelist, error) {
w := &patternWhitelist{
sysctls: map[string]Namespace{},
prefixes: map[string]Namespace{},
annotationKey: annotationKey,
}
for _, s := range patterns {
if !extvalidation.IsValidSysctlPattern(s) {
return nil, fmt.Errorf("sysctl %q must have at most %d characters and match regex %s",
s,
validation.SysctlMaxLength,
extvalidation.SysctlPatternFmt,
)
}
if strings.HasSuffix(s, "*") {
prefix := s[:len(s)-1]
ns := NamespacedBy(prefix)
if ns == UnknownNamespace {
return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", s)
}
w.prefixes[prefix] = ns
} else {
ns := NamespacedBy(s)
if ns == UnknownNamespace {
return nil, fmt.Errorf("the sysctl %q are not known to be namespaced", s)
}
w.sysctls[s] = ns
}
}
return w, nil
}
// validateSysctl checks that a sysctl is whitelisted because it is known
// to be namespaced by the Linux kernel. Note that being whitelisted is required, but not
// sufficient: the container runtime might have a stricter check and refuse to launch a pod.
//
// The parameters hostNet and hostIPC are used to forbid sysctls for pod sharing the
// respective namespaces with the host. This check is only possible for sysctls on
// the static default whitelist, not those on the custom whitelist provided by the admin.
func (w *patternWhitelist) validateSysctl(sysctl string, hostNet, hostIPC bool) error {
nsErrorFmt := "%q not allowed with host %s enabled"
if ns, found := w.sysctls[sysctl]; found {
if ns == IpcNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
if ns == NetNamespace && hostNet {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
return nil
}
for p, ns := range w.prefixes {
if strings.HasPrefix(sysctl, p) {
if ns == IpcNamespace && hostIPC {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
if ns == NetNamespace && hostNet {
return fmt.Errorf(nsErrorFmt, sysctl, ns)
}
return nil
}
}
return fmt.Errorf("%q not whitelisted", sysctl)
}
// Admit checks that all sysctls given in a api.SysctlsPodAnnotationKey annotation
// are valid according to the whitelist.
func (w *patternWhitelist) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
pod := attrs.Pod
a := pod.Annotations[w.annotationKey]
if a == "" {
return lifecycle.PodAdmitResult{
Admit: true,
}
}
sysctls, err := api.SysctlsFromPodAnnotation(a)
if err != nil {
return lifecycle.PodAdmitResult{
Admit: false,
Reason: AnnotationInvalidReason,
Message: fmt.Sprintf("invalid %s annotation: %v", w.annotationKey, err),
}
}
var hostNet, hostIPC bool
if pod.Spec.SecurityContext != nil {
hostNet = pod.Spec.SecurityContext.HostNetwork
hostIPC = pod.Spec.SecurityContext.HostIPC
}
for _, s := range sysctls {
if err := w.validateSysctl(s.Name, hostNet, hostIPC); err != nil {
return lifecycle.PodAdmitResult{
Admit: false,
Reason: ForbiddenReason,
Message: fmt.Sprintf("forbidden sysctl: %v", err),
}
}
}
return lifecycle.PodAdmitResult{
Admit: true,
}
}

View File

@@ -0,0 +1,84 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package sysctl
import (
"testing"
"k8s.io/kubernetes/pkg/api"
)
func TestNewWhitelist(t *testing.T) {
type Test struct {
sysctls []string
err bool
}
for _, test := range []Test{
{sysctls: []string{"kernel.msg*", "kernel.sem"}},
{sysctls: []string{" kernel.msg*"}, err: true},
{sysctls: []string{"kernel.msg* "}, err: true},
{sysctls: []string{"net.-"}, err: true},
{sysctls: []string{"net.*.foo"}, err: true},
{sysctls: []string{"foo"}, err: true},
} {
_, err := NewWhitelist(append(SafeSysctlWhitelist(), test.sysctls...), api.SysctlsPodAnnotationKey)
if test.err && err == nil {
t.Errorf("expected an error creating a whitelist for %v", test.sysctls)
} else if !test.err && err != nil {
t.Errorf("got unexpected error creating a whitelist for %v: %v", test.sysctls, err)
}
}
}
func TestWhitelist(t *testing.T) {
type Test struct {
sysctl string
hostNet, hostIPC bool
}
valid := []Test{
{sysctl: "kernel.shm_rmid_forced"},
{sysctl: "net.ipv4.ip_local_port_range"},
{sysctl: "kernel.msgmax"},
{sysctl: "kernel.sem"},
}
invalid := []Test{
{sysctl: "kernel.shm_rmid_forced", hostIPC: true},
{sysctl: "net.ipv4.ip_local_port_range", hostNet: true},
{sysctl: "foo"},
{sysctl: "net.a.b.c", hostNet: false},
{sysctl: "net.ipv4.ip_local_port_range.a.b.c", hostNet: false},
{sysctl: "kernel.msgmax", hostIPC: true},
{sysctl: "kernel.sem", hostIPC: true},
}
w, err := NewWhitelist(append(SafeSysctlWhitelist(), "kernel.msg*", "kernel.sem"), api.SysctlsPodAnnotationKey)
if err != nil {
t.Fatalf("failed to create whitelist: %v", err)
}
for _, test := range valid {
if err := w.validateSysctl(test.sysctl, test.hostNet, test.hostIPC); err != nil {
t.Errorf("expected to be whitelisted: %+v, got: %v", test, err)
}
}
for _, test := range invalid {
if err := w.validateSysctl(test.sysctl, test.hostNet, test.hostIPC); err == nil {
t.Errorf("expected to be rejected: %+v", test)
}
}
}