rkt: Support alternate stage1's via annotation
This provides a basic implementation for setting a stage1 on a per-pod basis via an annotation. It's possible this feature should be gated behind additional knobs, such as a kubelet flag to filter allowed stage1s, or a check akin to what priviliged gets in the apiserver. Currently, it checks `AllowPrivileged`, as a means to let people disable this feature, though overloading it as stage1 and privileged isn't ideal.
This commit is contained in:
		@@ -14,7 +14,6 @@ See the License for the specific language governing permissions and
 | 
			
		||||
limitations under the License.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Generated via `mockgen k8s.io/kubernetes/pkg/kubelet/rkt VolumeGetter > mock_rkt/mock_volume_getter.go`
 | 
			
		||||
// Edited to include required boilerplate
 | 
			
		||||
// Source: k8s.io/kubernetes/pkg/kubelet/rkt (interfaces: VolumeGetter)
 | 
			
		||||
 
 | 
			
		||||
@@ -41,6 +41,7 @@ import (
 | 
			
		||||
	"golang.org/x/net/context"
 | 
			
		||||
	"google.golang.org/grpc"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/api"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/capabilities"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/client/record"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/credentialprovider"
 | 
			
		||||
	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
 | 
			
		||||
@@ -89,6 +90,17 @@ const (
 | 
			
		||||
	k8sRktContainerHashAnno          = "rkt.kubernetes.io/container-hash"
 | 
			
		||||
	k8sRktRestartCountAnno           = "rkt.kubernetes.io/restart-count"
 | 
			
		||||
	k8sRktTerminationMessagePathAnno = "rkt.kubernetes.io/termination-message-path"
 | 
			
		||||
 | 
			
		||||
	// TODO(euank): This has significant security concerns as a stage1 image is
 | 
			
		||||
	// effectively root.
 | 
			
		||||
	// Furthermore, this (using an annotation) is a hack to pass an extra
 | 
			
		||||
	// non-portable argument in. It should not be relied on to be stable.
 | 
			
		||||
	// In the future, this might be subsumed by a first-class api object, or by a
 | 
			
		||||
	// kitchen-sink params object (#17064).
 | 
			
		||||
	// See discussion in #23944
 | 
			
		||||
	// Also, do we want more granularity than path-at-the-kubelet-level and
 | 
			
		||||
	// image/name-at-the-pod-level?
 | 
			
		||||
	k8sRktStage1NameAnno = "rkt.alpha.kubernetes.io/stage1-name-override"
 | 
			
		||||
	dockerPrefix         = "docker://"
 | 
			
		||||
 | 
			
		||||
	authDir            = "auth.d"
 | 
			
		||||
@@ -130,7 +142,7 @@ type Runtime struct {
 | 
			
		||||
	runtimeHelper       kubecontainer.RuntimeHelper
 | 
			
		||||
	recorder            record.EventRecorder
 | 
			
		||||
	livenessManager     proberesults.Manager
 | 
			
		||||
	volumeGetter        volumeGetter
 | 
			
		||||
	volumeGetter        VolumeGetter
 | 
			
		||||
	imagePuller         kubecontainer.ImagePuller
 | 
			
		||||
	runner              kubecontainer.HandlerRunner
 | 
			
		||||
	execer              utilexec.Interface
 | 
			
		||||
@@ -154,7 +166,7 @@ type Runtime struct {
 | 
			
		||||
var _ kubecontainer.Runtime = &Runtime{}
 | 
			
		||||
 | 
			
		||||
// TODO(yifan): Remove this when volumeManager is moved to separate package.
 | 
			
		||||
type volumeGetter interface {
 | 
			
		||||
type VolumeGetter interface {
 | 
			
		||||
	GetVolumes(podUID types.UID) (kubecontainer.VolumeMap, bool)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -181,7 +193,7 @@ func New(
 | 
			
		||||
	containerRefManager *kubecontainer.RefManager,
 | 
			
		||||
	podGetter podGetter,
 | 
			
		||||
	livenessManager proberesults.Manager,
 | 
			
		||||
	volumeGetter volumeGetter,
 | 
			
		||||
	volumeGetter VolumeGetter,
 | 
			
		||||
	httpClient kubetypes.HttpGetter,
 | 
			
		||||
	networkPlugin network.NetworkPlugin,
 | 
			
		||||
	hairpinMode bool,
 | 
			
		||||
@@ -264,10 +276,8 @@ func New(
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (r *Runtime) buildCommand(args ...string) *exec.Cmd {
 | 
			
		||||
	cmd := exec.Command(r.config.Path)
 | 
			
		||||
	cmd.Args = append(cmd.Args, r.config.buildGlobalOptions()...)
 | 
			
		||||
	cmd.Args = append(cmd.Args, args...)
 | 
			
		||||
	return cmd
 | 
			
		||||
	allArgs := append(r.config.buildGlobalOptions(), args...)
 | 
			
		||||
	return exec.Command(r.config.Path, allArgs...)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// convertToACName converts a string into ACName.
 | 
			
		||||
@@ -285,7 +295,8 @@ func (r *Runtime) RunCommand(args ...string) ([]string, error) {
 | 
			
		||||
 | 
			
		||||
	var stdout, stderr bytes.Buffer
 | 
			
		||||
	cmd := r.buildCommand(args...)
 | 
			
		||||
	cmd.Stdout, cmd.Stderr = &stdout, &stderr
 | 
			
		||||
	cmd.Stdout = &stdout
 | 
			
		||||
	cmd.Stderr = &stderr
 | 
			
		||||
	if err := cmd.Run(); err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("failed to run %v: %v\nstdout: %v\nstderr: %v", args, err, stdout.String(), stderr.String())
 | 
			
		||||
	}
 | 
			
		||||
@@ -595,14 +606,19 @@ func (r *Runtime) makePodManifest(pod *api.Pod, pullSecrets []api.Secret) (*appc
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	requiresPrivileged := false
 | 
			
		||||
	manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktKubeletAnno), k8sRktKubeletAnnoValue)
 | 
			
		||||
	manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktUIDAnno), string(pod.UID))
 | 
			
		||||
	manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNameAnno), pod.Name)
 | 
			
		||||
	manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktNamespaceAnno), pod.Namespace)
 | 
			
		||||
	manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktRestartCountAnno), strconv.Itoa(restartCount))
 | 
			
		||||
	if stage1Name, ok := pod.Annotations[k8sRktStage1NameAnno]; ok {
 | 
			
		||||
		requiresPrivileged = true
 | 
			
		||||
		manifest.Annotations.Set(*appctypes.MustACIdentifier(k8sRktStage1NameAnno), stage1Name)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, c := range pod.Spec.Containers {
 | 
			
		||||
		err := r.newAppcRuntimeApp(pod, c, pullSecrets, manifest)
 | 
			
		||||
		err := r.newAppcRuntimeApp(pod, c, requiresPrivileged, pullSecrets, manifest)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
@@ -707,7 +723,11 @@ func (r *Runtime) makeContainerLogMount(opts *kubecontainer.RunContainerOptions,
 | 
			
		||||
	return &mnt, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, pullSecrets []api.Secret, manifest *appcschema.PodManifest) error {
 | 
			
		||||
func (r *Runtime) newAppcRuntimeApp(pod *api.Pod, c api.Container, requiresPrivileged bool, pullSecrets []api.Secret, manifest *appcschema.PodManifest) error {
 | 
			
		||||
	if requiresPrivileged && !capabilities.Get().AllowPrivileged {
 | 
			
		||||
		return fmt.Errorf("cannot make %q: running a custom stage1 requires a privileged security context", format.Pod(pod))
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if err, _ := r.imagePuller.PullImage(pod, &c, pullSecrets); err != nil {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
@@ -950,6 +970,27 @@ func (r *Runtime) cleanupPodNetwork(pod *api.Pod) error {
 | 
			
		||||
	return teardownErr
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (r *Runtime) preparePodArgs(manifest *appcschema.PodManifest, manifestFileName string) []string {
 | 
			
		||||
	// Order of precedence for the stage1:
 | 
			
		||||
	// 1) pod annotation (stage1 name)
 | 
			
		||||
	// 2) kubelet configured stage1 (stage1 path)
 | 
			
		||||
	// 3) empty; whatever rkt's compiled to default to
 | 
			
		||||
	stage1ImageCmd := ""
 | 
			
		||||
	if r.config.Stage1Image != "" {
 | 
			
		||||
		stage1ImageCmd = "--stage1-path=" + r.config.Stage1Image
 | 
			
		||||
	}
 | 
			
		||||
	if stage1Name, ok := manifest.Annotations.Get(k8sRktStage1NameAnno); ok {
 | 
			
		||||
		stage1ImageCmd = "--stage1-name=" + stage1Name
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Run 'rkt prepare' to get the rkt UUID.
 | 
			
		||||
	cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFileName}
 | 
			
		||||
	if stage1ImageCmd != "" {
 | 
			
		||||
		cmds = append(cmds, stage1ImageCmd)
 | 
			
		||||
	}
 | 
			
		||||
	return cmds
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// preparePod will:
 | 
			
		||||
//
 | 
			
		||||
// 1. Invoke 'rkt prepare' to prepare the pod, and get the rkt pod uuid.
 | 
			
		||||
@@ -958,7 +999,7 @@ func (r *Runtime) cleanupPodNetwork(pod *api.Pod) error {
 | 
			
		||||
// On success, it will return a string that represents name of the unit file
 | 
			
		||||
// and the runtime pod.
 | 
			
		||||
func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName string) (string, *kubecontainer.Pod, error) {
 | 
			
		||||
	// Generate the pod manifest from the pod spec.
 | 
			
		||||
	// Generate the appc pod manifest from the k8s pod spec.
 | 
			
		||||
	manifest, err := r.makePodManifest(pod, pullSecrets)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", nil, err
 | 
			
		||||
@@ -986,12 +1027,8 @@ func (r *Runtime) preparePod(pod *api.Pod, pullSecrets []api.Secret, netnsName s
 | 
			
		||||
		return "", nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Run 'rkt prepare' to get the rkt UUID.
 | 
			
		||||
	cmds := []string{"prepare", "--quiet", "--pod-manifest", manifestFile.Name()}
 | 
			
		||||
	if r.config.Stage1Image != "" {
 | 
			
		||||
		cmds = append(cmds, "--stage1-path", r.config.Stage1Image)
 | 
			
		||||
	}
 | 
			
		||||
	output, err := r.cli.RunCommand(cmds...)
 | 
			
		||||
	prepareCmd := r.preparePodArgs(manifest, manifestFile.Name())
 | 
			
		||||
	output, err := r.RunCommand(prepareCmd...)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return "", nil, err
 | 
			
		||||
	}
 | 
			
		||||
@@ -1809,7 +1846,7 @@ func (r *Runtime) ExecInContainer(containerID kubecontainer.ContainerID, cmd []s
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	args := append([]string{}, "enter", fmt.Sprintf("--app=%s", id.appName), id.uuid)
 | 
			
		||||
	args := []string{"enter", fmt.Sprintf("--app=%s", id.appName), id.uuid}
 | 
			
		||||
	args = append(args, cmd...)
 | 
			
		||||
	command := r.buildCommand(args...)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -36,8 +36,10 @@ import (
 | 
			
		||||
	kubetesting "k8s.io/kubernetes/pkg/kubelet/container/testing"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/rkt/mock_os"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/kubelet/rkt/mock_rkt"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/types"
 | 
			
		||||
	"k8s.io/kubernetes/pkg/util/errors"
 | 
			
		||||
	utilexec "k8s.io/kubernetes/pkg/util/exec"
 | 
			
		||||
	utiltesting "k8s.io/kubernetes/pkg/util/testing"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -1194,6 +1196,11 @@ func TestGenerateRunCommand(t *testing.T) {
 | 
			
		||||
	for i, tt := range tests {
 | 
			
		||||
		testCaseHint := fmt.Sprintf("test case #%d", i)
 | 
			
		||||
		rkt.runtimeHelper = &fakeRuntimeHelper{tt.dnsServers, tt.dnsSearches, tt.hostName, "", tt.err}
 | 
			
		||||
		rkt.execer = &utilexec.FakeExec{CommandScript: []utilexec.FakeCommandAction{func(cmd string, args ...string) utilexec.Cmd {
 | 
			
		||||
			return utilexec.InitFakeCmd(&utilexec.FakeCmd{}, cmd, args...)
 | 
			
		||||
		}}}
 | 
			
		||||
 | 
			
		||||
		// a command should be created of this form, but the returned command shouldn't be called (asserted by having no expectations on it)
 | 
			
		||||
 | 
			
		||||
		result, err := rkt.generateRunCommand(tt.pod, tt.uuid, tt.netnsName)
 | 
			
		||||
		assert.Equal(t, tt.err, err, testCaseHint)
 | 
			
		||||
@@ -1617,3 +1624,137 @@ func TestGarbageCollect(t *testing.T) {
 | 
			
		||||
		getter.pods = make(map[types.UID]*api.Pod)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type annotationsByName []appctypes.Annotation
 | 
			
		||||
 | 
			
		||||
func (a annotationsByName) Len() int           { return len(a) }
 | 
			
		||||
func (a annotationsByName) Less(x, y int) bool { return a[x].Name < a[y].Name }
 | 
			
		||||
func (a annotationsByName) Swap(x, y int)      { a[x], a[y] = a[y], a[x] }
 | 
			
		||||
 | 
			
		||||
func TestMakePodManifestAnnotations(t *testing.T) {
 | 
			
		||||
	ctrl := gomock.NewController(t)
 | 
			
		||||
	defer ctrl.Finish()
 | 
			
		||||
 | 
			
		||||
	mockVolumeGetter := mock_rkt.NewMockVolumeGetter(ctrl)
 | 
			
		||||
 | 
			
		||||
	fr := newFakeRktInterface()
 | 
			
		||||
	fs := newFakeSystemd()
 | 
			
		||||
	r := &Runtime{apisvc: fr, systemd: fs, volumeGetter: mockVolumeGetter}
 | 
			
		||||
 | 
			
		||||
	testCases := []struct {
 | 
			
		||||
		in     *api.Pod
 | 
			
		||||
		out    *appcschema.PodManifest
 | 
			
		||||
		outerr error
 | 
			
		||||
	}{
 | 
			
		||||
		{
 | 
			
		||||
			in: &api.Pod{
 | 
			
		||||
				ObjectMeta: api.ObjectMeta{
 | 
			
		||||
					UID:       "uid-1",
 | 
			
		||||
					Name:      "name-1",
 | 
			
		||||
					Namespace: "namespace-1",
 | 
			
		||||
					Annotations: map[string]string{
 | 
			
		||||
						k8sRktStage1NameAnno: "stage1-override-img",
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			out: &appcschema.PodManifest{
 | 
			
		||||
				Annotations: []appctypes.Annotation{
 | 
			
		||||
					{
 | 
			
		||||
						Name:  appctypes.ACIdentifier(k8sRktStage1NameAnno),
 | 
			
		||||
						Value: "stage1-override-img",
 | 
			
		||||
					},
 | 
			
		||||
					{
 | 
			
		||||
						Name:  appctypes.ACIdentifier(k8sRktUIDAnno),
 | 
			
		||||
						Value: "uid-1",
 | 
			
		||||
					},
 | 
			
		||||
					{
 | 
			
		||||
						Name:  appctypes.ACIdentifier(k8sRktNameAnno),
 | 
			
		||||
						Value: "name-1",
 | 
			
		||||
					},
 | 
			
		||||
					{
 | 
			
		||||
						Name:  appctypes.ACIdentifier(k8sRktKubeletAnno),
 | 
			
		||||
						Value: "true",
 | 
			
		||||
					},
 | 
			
		||||
					{
 | 
			
		||||
						Name:  appctypes.ACIdentifier(k8sRktNamespaceAnno),
 | 
			
		||||
						Value: "namespace-1",
 | 
			
		||||
					},
 | 
			
		||||
					{
 | 
			
		||||
						Name:  appctypes.ACIdentifier(k8sRktRestartCountAnno),
 | 
			
		||||
						Value: "0",
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for i, testCase := range testCases {
 | 
			
		||||
		hint := fmt.Sprintf("case #%d", i)
 | 
			
		||||
		mockVolumeGetter.EXPECT().GetVolumes(gomock.Any()).Return(kubecontainer.VolumeMap{}, true)
 | 
			
		||||
 | 
			
		||||
		result, err := r.makePodManifest(testCase.in, []api.Secret{})
 | 
			
		||||
		assert.Equal(t, err, testCase.outerr, hint)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			sort.Sort(annotationsByName(result.Annotations))
 | 
			
		||||
			sort.Sort(annotationsByName(testCase.out.Annotations))
 | 
			
		||||
			assert.Equal(t, result.Annotations, testCase.out.Annotations, hint)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestPreparePodArgs(t *testing.T) {
 | 
			
		||||
	r := &Runtime{
 | 
			
		||||
		config: &Config{},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	testCases := []struct {
 | 
			
		||||
		manifest     appcschema.PodManifest
 | 
			
		||||
		stage1Config string
 | 
			
		||||
		cmd          []string
 | 
			
		||||
	}{
 | 
			
		||||
		{
 | 
			
		||||
			appcschema.PodManifest{
 | 
			
		||||
				Annotations: appctypes.Annotations{
 | 
			
		||||
					{
 | 
			
		||||
						Name:  k8sRktStage1NameAnno,
 | 
			
		||||
						Value: "stage1-image",
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			"",
 | 
			
		||||
			[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-name=stage1-image"},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			appcschema.PodManifest{
 | 
			
		||||
				Annotations: appctypes.Annotations{
 | 
			
		||||
					{
 | 
			
		||||
						Name:  k8sRktStage1NameAnno,
 | 
			
		||||
						Value: "stage1-image",
 | 
			
		||||
					},
 | 
			
		||||
				},
 | 
			
		||||
			},
 | 
			
		||||
			"stage1-path",
 | 
			
		||||
			[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-name=stage1-image"},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			appcschema.PodManifest{
 | 
			
		||||
				Annotations: appctypes.Annotations{},
 | 
			
		||||
			},
 | 
			
		||||
			"stage1-path",
 | 
			
		||||
			[]string{"prepare", "--quiet", "--pod-manifest", "file", "--stage1-path=stage1-path"},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			appcschema.PodManifest{
 | 
			
		||||
				Annotations: appctypes.Annotations{},
 | 
			
		||||
			},
 | 
			
		||||
			"",
 | 
			
		||||
			[]string{"prepare", "--quiet", "--pod-manifest", "file"},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for i, testCase := range testCases {
 | 
			
		||||
		r.config.Stage1Image = testCase.stage1Config
 | 
			
		||||
		cmd := r.preparePodArgs(&testCase.manifest, "file")
 | 
			
		||||
		assert.Equal(t, testCase.cmd, cmd, fmt.Sprintf("Test case #%d", i))
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user