 26158609b5
			
		
	
	26158609b5
	
	
	
		
			
			This pacakge is only used internally in the cri package, which is an internal packages, so we can make the utility internal as well. Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
		
			
				
	
	
		
			210 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			210 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
|    Copyright The containerd Authors.
 | |
| 
 | |
|    Licensed under the Apache License, Version 2.0 (the "License");
 | |
|    you may not use this file except in compliance with the License.
 | |
|    You may obtain a copy of the License at
 | |
| 
 | |
|        http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
|    Unless required by applicable law or agreed to in writing, software
 | |
|    distributed under the License is distributed on an "AS IS" BASIS,
 | |
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|    See the License for the specific language governing permissions and
 | |
|    limitations under the License.
 | |
| */
 | |
| 
 | |
| package server
 | |
| 
 | |
| import (
 | |
| 	"context"
 | |
| 	"fmt"
 | |
| 	"os"
 | |
| 	"path/filepath"
 | |
| 	"sort"
 | |
| 	"strings"
 | |
| 	"syscall"
 | |
| 	"time"
 | |
| 
 | |
| 	"github.com/containerd/cgroups/v3"
 | |
| 	"github.com/containerd/log"
 | |
| 	"github.com/moby/sys/mountinfo"
 | |
| 	"github.com/opencontainers/runtime-spec/specs-go"
 | |
| 	"golang.org/x/sys/unix"
 | |
| 	runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
 | |
| 
 | |
| 	containerd "github.com/containerd/containerd/v2/client"
 | |
| 	"github.com/containerd/containerd/v2/core/mount"
 | |
| 	"github.com/containerd/containerd/v2/core/snapshots"
 | |
| 	"github.com/containerd/containerd/v2/internal/cri/seutil"
 | |
| 	"github.com/containerd/containerd/v2/pkg/apparmor"
 | |
| 	"github.com/containerd/containerd/v2/pkg/seccomp"
 | |
| )
 | |
| 
 | |
| // apparmorEnabled returns true if apparmor is enabled, supported by the host,
 | |
| // if apparmor_parser is installed, and if we are not running docker-in-docker.
 | |
| func (c *criService) apparmorEnabled() bool {
 | |
| 	if c.config.DisableApparmor {
 | |
| 		return false
 | |
| 	}
 | |
| 	return apparmor.HostSupports()
 | |
| }
 | |
| 
 | |
| func (c *criService) seccompEnabled() bool {
 | |
| 	return seccomp.IsEnabled()
 | |
| }
 | |
| 
 | |
| // openLogFile opens/creates a container log file.
 | |
| func openLogFile(path string) (*os.File, error) {
 | |
| 	if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
 | |
| }
 | |
| 
 | |
| // unmountRecursive unmounts the target and all mounts underneath, starting with
 | |
| // the deepest mount first.
 | |
| func unmountRecursive(ctx context.Context, target string) error {
 | |
| 	target, err := mount.CanonicalizePath(target)
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	toUnmount, err := mountinfo.GetMounts(mountinfo.PrefixFilter(target))
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	// Make the deepest mount be first
 | |
| 	sort.Slice(toUnmount, func(i, j int) bool {
 | |
| 		return len(toUnmount[i].Mountpoint) > len(toUnmount[j].Mountpoint)
 | |
| 	})
 | |
| 
 | |
| 	for i, m := range toUnmount {
 | |
| 		if err := mount.UnmountAll(m.Mountpoint, unix.MNT_DETACH); err != nil {
 | |
| 			if i == len(toUnmount)-1 { // last mount
 | |
| 				return err
 | |
| 			}
 | |
| 			// This is some submount, we can ignore this error for now, the final unmount will fail if this is a real problem
 | |
| 			log.G(ctx).WithError(err).Debugf("failed to unmount submount %s", m.Mountpoint)
 | |
| 		}
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
 | |
| // often be remedied.
 | |
| // Only use `ensureRemoveAll` if you really want to make every effort to remove
 | |
| // a directory.
 | |
| //
 | |
| // Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there
 | |
| // can be a race between reading directory entries and then actually attempting
 | |
| // to remove everything in the directory.
 | |
| // These types of errors do not need to be returned since it's ok for the dir to
 | |
| // be gone we can just retry the remove operation.
 | |
| //
 | |
| // This should not return a `os.ErrNotExist` kind of error under any circumstances
 | |
| func ensureRemoveAll(ctx context.Context, dir string) error {
 | |
| 	notExistErr := make(map[string]bool)
 | |
| 
 | |
| 	// track retries
 | |
| 	exitOnErr := make(map[string]int)
 | |
| 	maxRetry := 50
 | |
| 
 | |
| 	// Attempt to unmount anything beneath this dir first.
 | |
| 	if err := unmountRecursive(ctx, dir); err != nil {
 | |
| 		log.G(ctx).WithError(err).Debugf("failed to do initial unmount of %s", dir)
 | |
| 	}
 | |
| 
 | |
| 	for {
 | |
| 		err := os.RemoveAll(dir)
 | |
| 		if err == nil {
 | |
| 			return nil
 | |
| 		}
 | |
| 
 | |
| 		pe, ok := err.(*os.PathError)
 | |
| 		if !ok {
 | |
| 			return err
 | |
| 		}
 | |
| 
 | |
| 		if os.IsNotExist(err) {
 | |
| 			if notExistErr[pe.Path] {
 | |
| 				return err
 | |
| 			}
 | |
| 			notExistErr[pe.Path] = true
 | |
| 
 | |
| 			// There is a race where some subdir can be removed but after the
 | |
| 			// parent dir entries have been read.
 | |
| 			// So the path could be from `os.Remove(subdir)`
 | |
| 			// If the reported non-existent path is not the passed in `dir` we
 | |
| 			// should just retry, but otherwise return with no error.
 | |
| 			if pe.Path == dir {
 | |
| 				return nil
 | |
| 			}
 | |
| 			continue
 | |
| 		}
 | |
| 
 | |
| 		if pe.Err != syscall.EBUSY {
 | |
| 			return err
 | |
| 		}
 | |
| 		if e := mount.Unmount(pe.Path, unix.MNT_DETACH); e != nil {
 | |
| 			return fmt.Errorf("error while removing %s: %w", dir, e)
 | |
| 		}
 | |
| 
 | |
| 		if exitOnErr[pe.Path] == maxRetry {
 | |
| 			return err
 | |
| 		}
 | |
| 		exitOnErr[pe.Path]++
 | |
| 		time.Sleep(100 * time.Millisecond)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| var vmbasedRuntimes = []string{
 | |
| 	"io.containerd.kata",
 | |
| }
 | |
| 
 | |
| func isVMBasedRuntime(runtimeType string) bool {
 | |
| 	for _, rt := range vmbasedRuntimes {
 | |
| 		if strings.Contains(runtimeType, rt) {
 | |
| 			return true
 | |
| 		}
 | |
| 	}
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
 | |
| 	if !isVMBasedRuntime(runtimeType) {
 | |
| 		return nil
 | |
| 	}
 | |
| 	l, err := seutil.ChangeToKVM(spec.Process.SelinuxLabel)
 | |
| 	if err != nil {
 | |
| 		return fmt.Errorf("failed to get selinux kvm label: %w", err)
 | |
| 	}
 | |
| 	spec.Process.SelinuxLabel = l
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| // getCgroupsMode returns cgropu mode.
 | |
| // TODO: add build constraints to cgroups package and remove this helper
 | |
| func isUnifiedCgroupsMode() bool {
 | |
| 	return cgroups.Mode() == cgroups.Unified
 | |
| }
 | |
| 
 | |
| func snapshotterRemapOpts(nsOpts *runtime.NamespaceOption) ([]snapshots.Opt, error) {
 | |
| 	snapshotOpt := []snapshots.Opt{}
 | |
| 	usernsOpts := nsOpts.GetUsernsOptions()
 | |
| 	if usernsOpts == nil {
 | |
| 		return snapshotOpt, nil
 | |
| 	}
 | |
| 
 | |
| 	uids, gids, err := parseUsernsIDs(usernsOpts)
 | |
| 	if err != nil {
 | |
| 		return nil, fmt.Errorf("user namespace configuration: %w", err)
 | |
| 	}
 | |
| 
 | |
| 	if usernsOpts.GetMode() == runtime.NamespaceMode_POD {
 | |
| 		snapshotOpt = append(snapshotOpt, containerd.WithRemapperLabels(0, uids[0].HostID, 0, gids[0].HostID, uids[0].Size))
 | |
| 	}
 | |
| 	return snapshotOpt, nil
 | |
| }
 |