Revert "Update runc to 1.0.0"
This commit is contained in:
119
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
119
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
generated
vendored
@@ -11,7 +11,6 @@ import (
|
||||
"strconv"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
@@ -23,44 +22,11 @@ const (
|
||||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
|
||||
// Generate the minimum ruleset for the device rules we are given. While we
|
||||
// don't care about minimum transitions in cgroupv2, using the emulator
|
||||
// gives us a guarantee that the behaviour of devices filtering is the same
|
||||
// as cgroupv1, including security hardenings to avoid misconfiguration
|
||||
// (such as punching holes in wildcard rules).
|
||||
emu := new(devicesemulator.Emulator)
|
||||
for _, rule := range rules {
|
||||
if err := emu.Apply(*rule); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
cleanRules, err := emu.Rules()
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
p := &program{
|
||||
defaultAllow: emu.IsBlacklist(),
|
||||
}
|
||||
func DeviceFilter(devices []*devices.Rule) (asm.Instructions, string, error) {
|
||||
p := &program{}
|
||||
p.init()
|
||||
|
||||
for idx, rule := range cleanRules {
|
||||
if rule.Type == devices.WildcardDevice {
|
||||
// We can safely skip over wildcard entries because there should
|
||||
// only be one (at most) at the very start to instruct cgroupv1 to
|
||||
// go into allow-list mode. However we do double-check this here.
|
||||
if idx != 0 || rule.Allow != emu.IsBlacklist() {
|
||||
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
|
||||
}
|
||||
continue
|
||||
}
|
||||
if rule.Allow == p.defaultAllow {
|
||||
// There should be no rules which have an action equal to the
|
||||
// default action, the emulator removes those.
|
||||
return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
|
||||
}
|
||||
if err := p.appendRule(rule); err != nil {
|
||||
for i := len(devices) - 1; i >= 0; i-- {
|
||||
if err := p.appendDevice(devices[i]); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
@@ -69,9 +35,9 @@ func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
|
||||
}
|
||||
|
||||
type program struct {
|
||||
insts asm.Instructions
|
||||
defaultAllow bool
|
||||
blockID int
|
||||
insts asm.Instructions
|
||||
hasWildCard bool
|
||||
blockID int
|
||||
}
|
||||
|
||||
func (p *program) init() {
|
||||
@@ -101,35 +67,39 @@ func (p *program) init() {
|
||||
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||
}
|
||||
|
||||
// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
|
||||
// to the in-progress filter program. In order to operate properly, it must be
|
||||
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
|
||||
// with any "a" rules removed).
|
||||
func (p *program) appendRule(rule *devices.Rule) error {
|
||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
||||
func (p *program) appendDevice(dev *devices.Rule) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
if p.hasWildCard {
|
||||
// All entries after wildcard entry are ignored
|
||||
return nil
|
||||
}
|
||||
|
||||
var bpfType int32
|
||||
switch rule.Type {
|
||||
case devices.CharDevice:
|
||||
bpfType := int32(-1)
|
||||
hasType := true
|
||||
switch dev.Type {
|
||||
case 'c':
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||
case devices.BlockDevice:
|
||||
case 'b':
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||
case 'a':
|
||||
hasType = false
|
||||
default:
|
||||
// We do not permit 'a', nor any other types we don't know about.
|
||||
return errors.Errorf("invalid type %q", string(rule.Type))
|
||||
// if not specified in OCI json, typ is set to DeviceTypeAll
|
||||
return errors.Errorf("invalid Type %q", string(dev.Type))
|
||||
}
|
||||
if rule.Major > math.MaxUint32 {
|
||||
return errors.Errorf("invalid major %d", rule.Major)
|
||||
if dev.Major > math.MaxUint32 {
|
||||
return errors.Errorf("invalid major %d", dev.Major)
|
||||
}
|
||||
if rule.Minor > math.MaxUint32 {
|
||||
return errors.Errorf("invalid minor %d", rule.Major)
|
||||
if dev.Minor > math.MaxUint32 {
|
||||
return errors.Errorf("invalid minor %d", dev.Major)
|
||||
}
|
||||
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := rule.Minor >= 0
|
||||
hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := dev.Minor >= 0
|
||||
bpfAccess := int32(0)
|
||||
for _, r := range rule.Permissions {
|
||||
for _, r := range dev.Permissions {
|
||||
switch r {
|
||||
case 'r':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||
@@ -149,10 +119,12 @@ func (p *program) appendRule(rule *devices.Rule) error {
|
||||
nextBlockSym = "block-" + strconv.Itoa(p.blockID+1)
|
||||
prevBlockLastIdx = len(p.insts) - 1
|
||||
)
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
if hasType {
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasAccess {
|
||||
p.insts = append(p.insts,
|
||||
// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
|
||||
@@ -164,16 +136,19 @@ func (p *program) appendRule(rule *devices.Rule) error {
|
||||
if hasMajor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R4 != major) goto next
|
||||
asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
|
||||
asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMinor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R5 != minor) goto next
|
||||
asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
|
||||
asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
|
||||
)
|
||||
}
|
||||
p.insts = append(p.insts, acceptBlock(rule.Allow)...)
|
||||
if !hasType && !hasAccess && !hasMajor && !hasMinor {
|
||||
p.hasWildCard = true
|
||||
}
|
||||
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
|
||||
// set blockSym to the first instruction we added in this iteration
|
||||
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
||||
p.blockID++
|
||||
@@ -181,14 +156,14 @@ func (p *program) appendRule(rule *devices.Rule) error {
|
||||
}
|
||||
|
||||
func (p *program) finalize() (asm.Instructions, error) {
|
||||
var v int32
|
||||
if p.defaultAllow {
|
||||
v = 1
|
||||
if p.hasWildCard {
|
||||
// acceptBlock with asm.Return() is already inserted
|
||||
return p.insts, nil
|
||||
}
|
||||
blockSym := "block-" + strconv.Itoa(p.blockID)
|
||||
p.insts = append(p.insts,
|
||||
// R0 <- v
|
||||
asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
|
||||
// R0 <- 0
|
||||
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
|
||||
asm.Return(),
|
||||
)
|
||||
p.blockID = -1
|
||||
@@ -196,7 +171,7 @@ func (p *program) finalize() (asm.Instructions, error) {
|
||||
}
|
||||
|
||||
func acceptBlock(accept bool) asm.Instructions {
|
||||
var v int32
|
||||
v := int32(0)
|
||||
if accept {
|
||||
v = 1
|
||||
}
|
||||
|
57
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf.go
generated
vendored
Normal file
57
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf.go
generated
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
package ebpf
|
||||
|
||||
import (
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/link"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
|
||||
nilCloser := func() error {
|
||||
return nil
|
||||
}
|
||||
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||
// This limit is not inherited into the container.
|
||||
memlockLimit := &unix.Rlimit{
|
||||
Cur: unix.RLIM_INFINITY,
|
||||
Max: unix.RLIM_INFINITY,
|
||||
}
|
||||
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
Target: dirFD,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: unix.BPF_F_ALLOW_MULTI,
|
||||
})
|
||||
if err != nil {
|
||||
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
closer := func() error {
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFD,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return closer, nil
|
||||
}
|
240
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
generated
vendored
240
vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go
generated
vendored
@@ -1,240 +0,0 @@
|
||||
package ebpf
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/link"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func nilCloser() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
|
||||
type bpfAttrQuery struct {
|
||||
TargetFd uint32
|
||||
AttachType uint32
|
||||
QueryType uint32
|
||||
AttachFlags uint32
|
||||
ProgIds uint64 // __aligned_u64
|
||||
ProgCnt uint32
|
||||
}
|
||||
|
||||
// Currently you can only have 64 eBPF programs attached to a cgroup.
|
||||
size := 64
|
||||
retries := 0
|
||||
for retries < 10 {
|
||||
progIds := make([]uint32, size)
|
||||
query := bpfAttrQuery{
|
||||
TargetFd: uint32(dirFd),
|
||||
AttachType: uint32(unix.BPF_CGROUP_DEVICE),
|
||||
ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))),
|
||||
ProgCnt: uint32(len(progIds)),
|
||||
}
|
||||
|
||||
// Fetch the list of program ids.
|
||||
_, _, errno := unix.Syscall(unix.SYS_BPF,
|
||||
uintptr(unix.BPF_PROG_QUERY),
|
||||
uintptr(unsafe.Pointer(&query)),
|
||||
unsafe.Sizeof(query))
|
||||
size = int(query.ProgCnt)
|
||||
runtime.KeepAlive(query)
|
||||
if errno != 0 {
|
||||
// On ENOSPC we get the correct number of programs.
|
||||
if errno == unix.ENOSPC {
|
||||
retries++
|
||||
continue
|
||||
}
|
||||
return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
|
||||
}
|
||||
|
||||
// Convert the ids to program handles.
|
||||
progIds = progIds[:size]
|
||||
programs := make([]*ebpf.Program, len(progIds))
|
||||
for idx, progId := range progIds {
|
||||
program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot fetch program from id: %w", err)
|
||||
}
|
||||
programs[idx] = program
|
||||
}
|
||||
runtime.KeepAlive(progIds)
|
||||
return programs, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
|
||||
}
|
||||
|
||||
var (
|
||||
haveBpfProgReplaceBool bool
|
||||
haveBpfProgReplaceOnce sync.Once
|
||||
)
|
||||
|
||||
// Loosely based on the BPF_F_REPLACE support check in
|
||||
// <https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go>.
|
||||
//
|
||||
// TODO: move this logic to cilium/ebpf
|
||||
func haveBpfProgReplace() bool {
|
||||
haveBpfProgReplaceOnce.Do(func() {
|
||||
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
License: "MIT",
|
||||
Instructions: asm.Instructions{
|
||||
asm.Mov.Imm(asm.R0, 0),
|
||||
asm.Return(),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err)
|
||||
return
|
||||
}
|
||||
defer prog.Close()
|
||||
|
||||
devnull, err := os.Open("/dev/null")
|
||||
if err != nil {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err)
|
||||
return
|
||||
}
|
||||
defer devnull.Close()
|
||||
|
||||
// We know that we have BPF_PROG_ATTACH since we can load
|
||||
// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL
|
||||
// we know that the feature isn't present.
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
// We rely on this fd being checked after attachFlags.
|
||||
Target: int(devnull.Fd()),
|
||||
// Attempt to "replace" bad fds with this program.
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE,
|
||||
})
|
||||
if errors.Is(err, unix.EINVAL) {
|
||||
// not supported
|
||||
return
|
||||
}
|
||||
// attach_flags test succeded.
|
||||
if !errors.Is(err, unix.EBADF) {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
|
||||
}
|
||||
haveBpfProgReplaceBool = true
|
||||
})
|
||||
return haveBpfProgReplaceBool
|
||||
}
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
|
||||
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||
// This limit is not inherited into the container.
|
||||
memlockLimit := &unix.Rlimit{
|
||||
Cur: unix.RLIM_INFINITY,
|
||||
Max: unix.RLIM_INFINITY,
|
||||
}
|
||||
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||
|
||||
// Get the list of existing programs.
|
||||
oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1
|
||||
|
||||
// Generate new program.
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
|
||||
// If there is only one old program, we can just replace it directly.
|
||||
var (
|
||||
replaceProg *ebpf.Program
|
||||
attachFlags uint32 = unix.BPF_F_ALLOW_MULTI
|
||||
)
|
||||
if useReplaceProg {
|
||||
replaceProg = oldProgs[0]
|
||||
attachFlags |= unix.BPF_F_REPLACE
|
||||
}
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
Target: dirFd,
|
||||
Program: prog,
|
||||
Replace: replaceProg,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: attachFlags,
|
||||
})
|
||||
if err != nil {
|
||||
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
|
||||
}
|
||||
closer := func() error {
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFd,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
|
||||
}
|
||||
// TODO: Should we attach the old filters back in this case? Otherwise
|
||||
// we fail-open on a security feature, which is a bit scary.
|
||||
return nil
|
||||
}
|
||||
if !useReplaceProg {
|
||||
logLevel := logrus.DebugLevel
|
||||
// If there was more than one old program, give a warning (since this
|
||||
// really shouldn't happen with runc-managed cgroups) and then detach
|
||||
// all the old programs.
|
||||
if len(oldProgs) > 1 {
|
||||
// NOTE: Ideally this should be a warning but it turns out that
|
||||
// systemd-managed cgroups trigger this warning (apparently
|
||||
// systemd doesn't delete old non-systemd programs when
|
||||
// setting properties).
|
||||
logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
|
||||
logLevel = logrus.InfoLevel
|
||||
}
|
||||
for idx, oldProg := range oldProgs {
|
||||
// Output some extra debug info.
|
||||
if info, err := oldProg.Info(); err == nil {
|
||||
fields := logrus.Fields{
|
||||
"type": info.Type.String(),
|
||||
"tag": info.Tag,
|
||||
"name": info.Name,
|
||||
}
|
||||
if id, ok := info.ID(); ok {
|
||||
fields["id"] = id
|
||||
}
|
||||
if runCount, ok := info.RunCount(); ok {
|
||||
fields["run_count"] = runCount
|
||||
}
|
||||
if runtime, ok := info.Runtime(); ok {
|
||||
fields["runtime"] = runtime.String()
|
||||
}
|
||||
logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx)
|
||||
}
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFd,
|
||||
Program: oldProg,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return closer, nil
|
||||
}
|
Reference in New Issue
Block a user