build(deps): bump github.com/containerd/cgroups/v3 from 3.0.2 to 3.0.3

Bumps [github.com/containerd/cgroups/v3](https://github.com/containerd/cgroups) from 3.0.2 to 3.0.3.
- [Release notes](https://github.com/containerd/cgroups/releases)
- [Commits](https://github.com/containerd/cgroups/compare/v3.0.2...v3.0.3)

---
updated-dependencies:
- dependency-name: github.com/containerd/cgroups/v3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2023-12-29 11:45:53 +00:00
committed by GitHub
parent 1f76ca4081
commit 5387747e92
119 changed files with 8743 additions and 3476 deletions

View File

@@ -10,10 +10,15 @@ import (
type cgroupAttachFlags uint32
// cgroup attach flags
const (
// Allow programs attached to sub-cgroups to override the verdict of this
// program.
flagAllowOverride cgroupAttachFlags = 1 << iota
// Allow attaching multiple programs to the cgroup. Only works if the cgroup
// has zero or more programs attached using the Multi flag. Implies override.
flagAllowMulti
// Set automatically by progAttachCgroup.Update(). Used for updating a
// specific given program attached in multi-mode.
flagReplace
)
@@ -27,29 +32,39 @@ type CgroupOptions struct {
}
// AttachCgroup links a BPF program to a cgroup.
func AttachCgroup(opts CgroupOptions) (Link, error) {
//
// If the running kernel doesn't support bpf_link, attempts to emulate its
// semantics using the legacy PROG_ATTACH mechanism. If bpf_link is not
// available, the returned [Link] will not support pinning to bpffs.
//
// If you need more control over attachment flags or the attachment mechanism
// used, look at [RawAttachProgram] and [AttachRawLink] instead.
func AttachCgroup(opts CgroupOptions) (cg Link, err error) {
cgroup, err := os.Open(opts.Path)
if err != nil {
return nil, fmt.Errorf("can't open cgroup: %s", err)
}
clone, err := opts.Program.Clone()
if err != nil {
defer func() {
if _, ok := cg.(*progAttachCgroup); ok {
// Skip closing the cgroup handle if we return a valid progAttachCgroup,
// where the handle is retained to implement Update().
return
}
cgroup.Close()
return nil, err
}()
cg, err = newLinkCgroup(cgroup, opts.Attach, opts.Program)
if err == nil {
return cg, nil
}
var cg Link
cg, err = newLinkCgroup(cgroup, opts.Attach, clone)
if errors.Is(err, ErrNotSupported) {
cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowMulti)
cg, err = newProgAttachCgroup(cgroup, opts.Attach, opts.Program, flagAllowMulti)
}
if errors.Is(err, ErrNotSupported) {
cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowOverride)
cg, err = newProgAttachCgroup(cgroup, opts.Attach, opts.Program, flagAllowOverride)
}
if err != nil {
cgroup.Close()
clone.Close()
return nil, err
}
@@ -67,6 +82,8 @@ var _ Link = (*progAttachCgroup)(nil)
func (cg *progAttachCgroup) isLink() {}
// newProgAttachCgroup attaches prog to cgroup using BPF_PROG_ATTACH.
// cgroup and prog are retained by [progAttachCgroup].
func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program, flags cgroupAttachFlags) (*progAttachCgroup, error) {
if flags&flagAllowMulti > 0 {
if err := haveProgAttachReplace(); err != nil {
@@ -74,17 +91,24 @@ func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Pro
}
}
err := RawAttachProgram(RawAttachProgramOptions{
// Use a program handle that cannot be closed by the caller.
clone, err := prog.Clone()
if err != nil {
return nil, err
}
err = RawAttachProgram(RawAttachProgramOptions{
Target: int(cgroup.Fd()),
Program: prog,
Program: clone,
Flags: uint32(flags),
Attach: attach,
})
if err != nil {
clone.Close()
return nil, fmt.Errorf("cgroup: %w", err)
}
return &progAttachCgroup{cgroup, prog, attach, flags}, nil
return &progAttachCgroup{cgroup, clone, attach, flags}, nil
}
func (cg *progAttachCgroup) Close() error {
@@ -138,7 +162,7 @@ func (cg *progAttachCgroup) Pin(string) error {
}
func (cg *progAttachCgroup) Unpin() error {
return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported)
return fmt.Errorf("can't unpin cgroup: %w", ErrNotSupported)
}
func (cg *progAttachCgroup) Info() (*Info, error) {
@@ -151,6 +175,7 @@ type linkCgroup struct {
var _ Link = (*linkCgroup)(nil)
// newLinkCgroup attaches prog to cgroup using BPF_LINK_CREATE.
func newLinkCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program) (*linkCgroup, error) {
link, err := AttachRawLink(RawLinkOptions{
Target: int(cgroup.Fd()),

View File

@@ -1,42 +1,20 @@
package link
import (
"bytes"
"crypto/rand"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"syscall"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/tracefs"
"github.com/cilium/ebpf/internal/unix"
)
var (
kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events")
kprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
)
type probeType uint8
type probeArgs struct {
symbol, group, path string
offset, refCtrOffset, cookie uint64
pid int
ret bool
}
// KprobeOptions defines additional parameters that will be used
// when loading Kprobes.
type KprobeOptions struct {
@@ -49,45 +27,23 @@ type KprobeOptions struct {
// Can be used to insert kprobes at arbitrary offsets in kernel functions,
// e.g. in places where functions have been inlined.
Offset uint64
// Increase the maximum number of concurrent invocations of a kretprobe.
// Required when tracing some long running functions in the kernel.
//
// Deprecated: this setting forces the use of an outdated kernel API and is not portable
// across kernel versions.
RetprobeMaxActive int
// Prefix used for the event name if the kprobe must be attached using tracefs.
// The group name will be formatted as `<prefix>_<randomstr>`.
// The default empty string is equivalent to "ebpf" as the prefix.
TraceFSPrefix string
}
const (
kprobeType probeType = iota
uprobeType
)
func (pt probeType) String() string {
if pt == kprobeType {
return "kprobe"
func (ko *KprobeOptions) cookie() uint64 {
if ko == nil {
return 0
}
return "uprobe"
}
func (pt probeType) EventsPath() string {
if pt == kprobeType {
return kprobeEventsPath
}
return uprobeEventsPath
}
func (pt probeType) PerfEventType(ret bool) perfEventType {
if pt == kprobeType {
if ret {
return kretprobeEvent
}
return kprobeEvent
}
if ret {
return uretprobeEvent
}
return uprobeEvent
}
func (pt probeType) RetprobeBit() (uint64, error) {
if pt == kprobeType {
return kretprobeBit()
}
return uretprobeBit()
return ko.Cookie
}
// Kprobe attaches the given eBPF program to a perf event that fires when the
@@ -99,13 +55,17 @@ func (pt probeType) RetprobeBit() (uint64, error) {
// Losing the reference to the resulting Link (kp) will close the Kprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// If attaching to symbol fails, automatically retries with the running
// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
// in a portable fashion.
func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, false)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(k, prog)
lnk, err := attachPerfEvent(k, prog, opts.cookie())
if err != nil {
k.Close()
return nil, err
@@ -123,13 +83,20 @@ func Kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error
// Losing the reference to the resulting Link (kp) will close the Kretprobe
// and prevent further execution of prog. The Link must be Closed during
// program shutdown to avoid leaking system resources.
//
// If attaching to symbol fails, automatically retries with the running
// platform's syscall prefix (e.g. __x64_) to support attaching to syscalls
// in a portable fashion.
//
// On kernels 5.10 and earlier, setting a kretprobe on a nonexistent symbol
// incorrectly returns unix.EINVAL instead of os.ErrNotExist.
func Kretprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions) (Link, error) {
k, err := kprobe(symbol, prog, opts, true)
if err != nil {
return nil, err
}
lnk, err := attachPerfEvent(k, prog)
lnk, err := attachPerfEvent(k, prog, opts.cookie())
if err != nil {
k.Close()
return nil, err
@@ -181,50 +148,51 @@ func kprobe(symbol string, prog *ebpf.Program, opts *KprobeOptions, ret bool) (*
return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput)
}
args := probeArgs{
pid: perfAllThreads,
symbol: symbol,
ret: ret,
args := tracefs.ProbeArgs{
Type: tracefs.Kprobe,
Pid: perfAllThreads,
Symbol: symbol,
Ret: ret,
}
if opts != nil {
args.cookie = opts.Cookie
args.offset = opts.Offset
args.RetprobeMaxActive = opts.RetprobeMaxActive
args.Cookie = opts.Cookie
args.Offset = opts.Offset
args.Group = opts.TraceFSPrefix
}
// Use kprobe PMU if the kernel has it available.
tp, err := pmuKprobe(args)
if errors.Is(err, os.ErrNotExist) {
args.symbol = platformPrefix(symbol)
tp, err = pmuKprobe(args)
tp, err := pmuProbe(args)
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
if prefix := internal.PlatformPrefix(); prefix != "" {
args.Symbol = prefix + symbol
tp, err = pmuProbe(args)
}
}
if err == nil {
return tp, nil
}
if err != nil && !errors.Is(err, ErrNotSupported) {
return nil, fmt.Errorf("creating perf_kprobe PMU: %w", err)
return nil, fmt.Errorf("creating perf_kprobe PMU (arch-specific fallback for %q): %w", symbol, err)
}
// Use tracefs if kprobe PMU is missing.
args.symbol = symbol
tp, err = tracefsKprobe(args)
if errors.Is(err, os.ErrNotExist) {
args.symbol = platformPrefix(symbol)
tp, err = tracefsKprobe(args)
args.Symbol = symbol
tp, err = tracefsProbe(args)
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
if prefix := internal.PlatformPrefix(); prefix != "" {
args.Symbol = prefix + symbol
tp, err = tracefsProbe(args)
}
}
if err != nil {
return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err)
return nil, fmt.Errorf("creating tracefs event (arch-specific fallback for %q): %w", symbol, err)
}
return tp, nil
}
// pmuKprobe opens a perf event based on the kprobe PMU.
// Returns os.ErrNotExist if the given symbol does not exist in the kernel.
func pmuKprobe(args probeArgs) (*perfEvent, error) {
return pmuProbe(kprobeType, args)
}
// pmuProbe opens a perf event based on a Performance Monitoring Unit.
//
// Requires at least a 4.17 kernel.
@@ -232,17 +200,25 @@ func pmuKprobe(args probeArgs) (*perfEvent, error) {
// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
//
// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) {
func pmuProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
// Getting the PMU type will fail if the kernel doesn't support
// the perf_[k,u]probe PMU.
et, err := getPMUEventType(typ)
eventType, err := internal.ReadUint64FromFileOnce("%d\n", "/sys/bus/event_source/devices", args.Type.String(), "type")
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("%s: %w", args.Type, ErrNotSupported)
}
if err != nil {
return nil, err
}
// Use tracefs if we want to set kretprobe's retprobeMaxActive.
if args.RetprobeMaxActive != 0 {
return nil, fmt.Errorf("pmu probe: non-zero retprobeMaxActive: %w", ErrNotSupported)
}
var config uint64
if args.ret {
bit, err := typ.RetprobeBit()
if args.Ret {
bit, err := internal.ReadUint64FromFileOnce("config:%d\n", "/sys/bus/event_source/devices", args.Type.String(), "/format/retprobe")
if err != nil {
return nil, err
}
@@ -250,75 +226,81 @@ func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) {
}
var (
attr unix.PerfEventAttr
sp unsafe.Pointer
attr unix.PerfEventAttr
sp unsafe.Pointer
token string
)
switch typ {
case kprobeType:
switch args.Type {
case tracefs.Kprobe:
// Create a pointer to a NUL-terminated string for the kernel.
sp, err = unsafeStringPtr(args.symbol)
sp, err = unsafeStringPtr(args.Symbol)
if err != nil {
return nil, err
}
token = tracefs.KprobeToken(args)
attr = unix.PerfEventAttr{
// The minimum size required for PMU kprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. Use Ext2 as probe_offset.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Type: uint32(eventType), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
Ext2: args.offset, // Kernel symbol offset
Ext2: args.Offset, // Kernel symbol offset
Config: config, // Retprobe flag
}
case uprobeType:
sp, err = unsafeStringPtr(args.path)
case tracefs.Uprobe:
sp, err = unsafeStringPtr(args.Path)
if err != nil {
return nil, err
}
if args.refCtrOffset != 0 {
config |= args.refCtrOffset << uprobeRefCtrOffsetShift
if args.RefCtrOffset != 0 {
config |= args.RefCtrOffset << uprobeRefCtrOffsetShift
}
token = tracefs.UprobeToken(args)
attr = unix.PerfEventAttr{
// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
// since it added the config2 (Ext2) field. The Size field controls the
// size of the internal buffer the kernel allocates for reading the
// perf_event_attr argument from userspace.
Size: unix.PERF_ATTR_SIZE_VER1,
Type: uint32(et), // PMU event type read from sysfs
Type: uint32(eventType), // PMU event type read from sysfs
Ext1: uint64(uintptr(sp)), // Uprobe path
Ext2: args.offset, // Uprobe offset
Ext2: args.Offset, // Uprobe offset
Config: config, // RefCtrOffset, Retprobe flag
}
}
rawFd, err := unix.PerfEventOpen(&attr, args.pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
rawFd, err := unix.PerfEventOpen(&attr, args.Pid, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
// On some old kernels, kprobe PMU doesn't allow `.` in symbol names and
// return -EINVAL. Return ErrNotSupported to allow falling back to tracefs.
// https://github.com/torvalds/linux/blob/94710cac0ef4/kernel/trace/trace_kprobe.c#L340-L343
if errors.Is(err, unix.EINVAL) && strings.Contains(args.symbol, ".") {
return nil, fmt.Errorf("symbol '%s+%#x': older kernels don't accept dots: %w", args.symbol, args.offset, ErrNotSupported)
if errors.Is(err, unix.EINVAL) && strings.Contains(args.Symbol, ".") {
return nil, fmt.Errorf("token %s: older kernels don't accept dots: %w", token, ErrNotSupported)
}
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return nil, fmt.Errorf("symbol '%s+%#x' not found: %w", args.symbol, args.offset, os.ErrNotExist)
// when trying to create a retprobe for a missing symbol.
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("token %s: not found: %w", token, err)
}
// Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary.
if errors.Is(err, syscall.EILSEQ) {
return nil, fmt.Errorf("symbol '%s+%#x' not found (bad insn boundary): %w", args.symbol, args.offset, os.ErrNotExist)
// Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary. The exact conditions that trigger this error are
// arch specific however.
if errors.Is(err, unix.EILSEQ) {
return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// Since at least commit cb9a19fe4aa51, ENOTSUPP is returned
// when attempting to set a uprobe on a trap instruction.
if errors.Is(err, unix.ENOTSUPP) {
return nil, fmt.Errorf("failed setting uprobe on offset %#x (possible trap insn): %w", args.offset, err)
if errors.Is(err, sys.ENOTSUPP) {
return nil, fmt.Errorf("token %s: failed setting uprobe on offset %#x (possible trap insn): %w", token, args.Offset, err)
}
if err != nil {
return nil, fmt.Errorf("opening perf event: %w", err)
return nil, fmt.Errorf("token %s: opening perf event: %w", token, err)
}
// Ensure the string pointer is not collected before PerfEventOpen returns.
@@ -330,18 +312,7 @@ func pmuProbe(typ probeType, args probeArgs) (*perfEvent, error) {
}
// Kernel has perf_[k,u]probe PMU available, initialize perf event.
return &perfEvent{
typ: typ.PerfEventType(args.ret),
name: args.symbol,
pmuID: et,
cookie: args.cookie,
fd: fd,
}, nil
}
// tracefsKprobe creates a Kprobe tracefs entry.
func tracefsKprobe(args probeArgs) (*perfEvent, error) {
return tracefsProbe(kprobeType, args)
return newPerfEvent(fd, nil), nil
}
// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
@@ -350,219 +321,37 @@ func tracefsKprobe(args probeArgs) (*perfEvent, error) {
// Path and offset are only set in the case of uprobe(s) and are used to set
// the executable/library path on the filesystem and the offset where the probe is inserted.
// A perf event is then opened on the newly-created trace event and returned to the caller.
func tracefsProbe(typ probeType, args probeArgs) (_ *perfEvent, err error) {
func tracefsProbe(args tracefs.ProbeArgs) (*perfEvent, error) {
groupPrefix := "ebpf"
if args.Group != "" {
groupPrefix = args.Group
}
// Generate a random string for each trace event we attempt to create.
// This value is used as the 'group' token in tracefs to allow creating
// multiple kprobe trace events with the same name.
group, err := randomGroup("ebpf")
group, err := tracefs.RandomGroup(groupPrefix)
if err != nil {
return nil, fmt.Errorf("randomizing group name: %w", err)
}
args.group = group
// Before attempting to create a trace event through tracefs,
// check if an event with the same group and name already exists.
// Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate
// entry, so we need to rely on reads for detecting uniqueness.
_, err = getTraceEventID(group, args.symbol)
if err == nil {
return nil, fmt.Errorf("trace event already exists: %s/%s", group, args.symbol)
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("checking trace event %s/%s: %w", group, args.symbol, err)
}
args.Group = group
// Create the [k,u]probe trace event using tracefs.
if err := createTraceFSProbeEvent(typ, args); err != nil {
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
}
defer func() {
if err != nil {
// Make sure we clean up the created tracefs event when we return error.
// If a livepatch handler is already active on the symbol, the write to
// tracefs will succeed, a trace event will show up, but creating the
// perf event will fail with EBUSY.
_ = closeTraceFSProbeEvent(typ, args.group, args.symbol)
}
}()
// Get the newly-created trace event's id.
tid, err := getTraceEventID(group, args.symbol)
evt, err := tracefs.NewEvent(args)
if err != nil {
return nil, fmt.Errorf("getting trace event id: %w", err)
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
}
// Kprobes are ephemeral tracepoints and share the same perf event type.
fd, err := openTracepointPerfEvent(tid, args.pid)
fd, err := openTracepointPerfEvent(evt.ID(), args.Pid)
if err != nil {
// Make sure we clean up the created tracefs event when we return error.
// If a livepatch handler is already active on the symbol, the write to
// tracefs will succeed, a trace event will show up, but creating the
// perf event will fail with EBUSY.
_ = evt.Close()
return nil, err
}
return &perfEvent{
typ: typ.PerfEventType(args.ret),
group: group,
name: args.symbol,
tracefsID: tid,
cookie: args.cookie,
fd: fd,
}, nil
}
// createTraceFSProbeEvent creates a new ephemeral trace event by writing to
// <tracefs>/[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
// if a probe with the same group and symbol already exists.
func createTraceFSProbeEvent(typ probeType, args probeArgs) error {
// Open the kprobe_events file in tracefs.
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
if err != nil {
return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err)
}
defer f.Close()
var pe, token string
switch typ {
case kprobeType:
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
//
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
// kernel default to NR_CPUS. This is desired in most eBPF cases since
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself.
// See Documentation/kprobes.txt for more details.
token = kprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, sanitizeSymbol(args.symbol), token)
case uprobeType:
// The uprobe_events syntax is as follows:
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/readline /bin/bash:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123)
//
// See Documentation/trace/uprobetracer.txt for more details.
token = uprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.ret), args.group, args.symbol, token)
}
_, err = f.WriteString(pe)
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
// is returned to the caller.
// EINVAL is also returned on pre-5.2 kernels when the `SYM[+offs]` token
// is resolved to an invalid insn boundary.
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
return fmt.Errorf("token %s: %w", token, os.ErrNotExist)
}
// Since commit ab105a4fb894, -EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary.
if errors.Is(err, syscall.EILSEQ) {
return fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// ERANGE is returned when the `SYM[+offs]` token is too big and cannot
// be resolved.
if errors.Is(err, syscall.ERANGE) {
return fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist)
}
if err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
}
return nil
}
// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol
// from <tracefs>/[k,u]probe_events.
func closeTraceFSProbeEvent(typ probeType, group, symbol string) error {
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
if err != nil {
return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err)
}
defer f.Close()
// See [k,u]probe_events syntax above. The probe type does not need to be specified
// for removals.
pe := fmt.Sprintf("-:%s/%s", group, sanitizeSymbol(symbol))
if _, err = f.WriteString(pe); err != nil {
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
}
return nil
}
// randomGroup generates a pseudorandom string for use as a tracefs group name.
// Returns an error when the output string would exceed 63 characters (kernel
// limitation), when rand.Read() fails or when prefix contains characters not
// allowed by isValidTraceID.
func randomGroup(prefix string) (string, error) {
if !isValidTraceID(prefix) {
return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput)
}
b := make([]byte, 8)
if _, err := rand.Read(b); err != nil {
return "", fmt.Errorf("reading random bytes: %w", err)
}
group := fmt.Sprintf("%s_%x", prefix, b)
if len(group) > 63 {
return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, errInvalidInput)
}
return group, nil
}
func probePrefix(ret bool) string {
if ret {
return "r"
}
return "p"
}
// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit
// from /sys/bus/event_source/devices/<pmu>/format/retprobe.
func determineRetprobeBit(typ probeType) (uint64, error) {
p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe")
data, err := os.ReadFile(p)
if err != nil {
return 0, err
}
var rp uint64
n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp)
if err != nil {
return 0, fmt.Errorf("parse retprobe bit: %w", err)
}
if n != 1 {
return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n)
}
return rp, nil
}
func kretprobeBit() (uint64, error) {
kprobeRetprobeBit.once.Do(func() {
kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType)
})
return kprobeRetprobeBit.value, kprobeRetprobeBit.err
}
// kprobeToken creates the SYM[+offs] token for the tracefs api.
func kprobeToken(args probeArgs) string {
po := args.symbol
if args.offset != 0 {
po += fmt.Sprintf("+%#x", args.offset)
}
return po
return newPerfEvent(fd, evt), nil
}

180
vendor/github.com/cilium/ebpf/link/kprobe_multi.go generated vendored Normal file
View File

@@ -0,0 +1,180 @@
package link
import (
"errors"
"fmt"
"os"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
// KprobeMultiOptions defines additional parameters that will be used
// when opening a KprobeMulti Link.
type KprobeMultiOptions struct {
// Symbols takes a list of kernel symbol names to attach an ebpf program to.
//
// Mutually exclusive with Addresses.
Symbols []string
// Addresses takes a list of kernel symbol addresses in case they can not
// be referred to by name.
//
// Note that only start addresses can be specified, since the fprobe API
// limits the attach point to the function entry or return.
//
// Mutually exclusive with Symbols.
Addresses []uintptr
// Cookies specifies arbitrary values that can be fetched from an eBPF
// program via `bpf_get_attach_cookie()`.
//
// If set, its length should be equal to the length of Symbols or Addresses.
// Each Cookie is assigned to the Symbol or Address specified at the
// corresponding slice index.
Cookies []uint64
}
// KprobeMulti attaches the given eBPF program to the entry point of a given set
// of kernel symbols.
//
// The difference with Kprobe() is that multi-kprobe accomplishes this in a
// single system call, making it significantly faster than attaching many
// probes one at a time.
//
// Requires at least Linux 5.18.
func KprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions) (Link, error) {
return kprobeMulti(prog, opts, 0)
}
// KretprobeMulti attaches the given eBPF program to the return point of a given
// set of kernel symbols.
//
// The difference with Kretprobe() is that multi-kprobe accomplishes this in a
// single system call, making it significantly faster than attaching many
// probes one at a time.
//
// Requires at least Linux 5.18.
func KretprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions) (Link, error) {
return kprobeMulti(prog, opts, unix.BPF_F_KPROBE_MULTI_RETURN)
}
func kprobeMulti(prog *ebpf.Program, opts KprobeMultiOptions, flags uint32) (Link, error) {
if prog == nil {
return nil, errors.New("cannot attach a nil program")
}
syms := uint32(len(opts.Symbols))
addrs := uint32(len(opts.Addresses))
cookies := uint32(len(opts.Cookies))
if syms == 0 && addrs == 0 {
return nil, fmt.Errorf("one of Symbols or Addresses is required: %w", errInvalidInput)
}
if syms != 0 && addrs != 0 {
return nil, fmt.Errorf("Symbols and Addresses are mutually exclusive: %w", errInvalidInput)
}
if cookies > 0 && cookies != syms && cookies != addrs {
return nil, fmt.Errorf("Cookies must be exactly Symbols or Addresses in length: %w", errInvalidInput)
}
if err := haveBPFLinkKprobeMulti(); err != nil {
return nil, err
}
attr := &sys.LinkCreateKprobeMultiAttr{
ProgFd: uint32(prog.FD()),
AttachType: sys.BPF_TRACE_KPROBE_MULTI,
KprobeMultiFlags: flags,
}
switch {
case syms != 0:
attr.Count = syms
attr.Syms = sys.NewStringSlicePointer(opts.Symbols)
case addrs != 0:
attr.Count = addrs
attr.Addrs = sys.NewPointer(unsafe.Pointer(&opts.Addresses[0]))
}
if cookies != 0 {
attr.Cookies = sys.NewPointer(unsafe.Pointer(&opts.Cookies[0]))
}
fd, err := sys.LinkCreateKprobeMulti(attr)
if errors.Is(err, unix.ESRCH) {
return nil, fmt.Errorf("couldn't find one or more symbols: %w", os.ErrNotExist)
}
if errors.Is(err, unix.EINVAL) {
return nil, fmt.Errorf("%w (missing kernel symbol or prog's AttachType not AttachTraceKprobeMulti?)", err)
}
if err != nil {
return nil, err
}
return &kprobeMultiLink{RawLink{fd, ""}}, nil
}
type kprobeMultiLink struct {
RawLink
}
var _ Link = (*kprobeMultiLink)(nil)
func (kml *kprobeMultiLink) Update(prog *ebpf.Program) error {
return fmt.Errorf("update kprobe_multi: %w", ErrNotSupported)
}
func (kml *kprobeMultiLink) Pin(string) error {
return fmt.Errorf("pin kprobe_multi: %w", ErrNotSupported)
}
func (kml *kprobeMultiLink) Unpin() error {
return fmt.Errorf("unpin kprobe_multi: %w", ErrNotSupported)
}
var haveBPFLinkKprobeMulti = internal.NewFeatureTest("bpf_link_kprobe_multi", "5.18", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Name: "probe_kpm_link",
Type: ebpf.Kprobe,
Instructions: asm.Instructions{
asm.Mov.Imm(asm.R0, 0),
asm.Return(),
},
AttachType: ebpf.AttachTraceKprobeMulti,
License: "MIT",
})
if errors.Is(err, unix.E2BIG) {
// Kernel doesn't support AttachType field.
return internal.ErrNotSupported
}
if err != nil {
return err
}
defer prog.Close()
fd, err := sys.LinkCreateKprobeMulti(&sys.LinkCreateKprobeMultiAttr{
ProgFd: uint32(prog.FD()),
AttachType: sys.BPF_TRACE_KPROBE_MULTI,
Count: 1,
Syms: sys.NewStringSlicePointer([]string{"vprintk"}),
})
switch {
case errors.Is(err, unix.EINVAL):
return internal.ErrNotSupported
// If CONFIG_FPROBE isn't set.
case errors.Is(err, unix.EOPNOTSUPP):
return internal.ErrNotSupported
case err != nil:
return err
}
fd.Close()
return nil
})

View File

@@ -46,6 +46,18 @@ type Link interface {
isLink()
}
// NewLinkFromFD creates a link from a raw fd.
//
// You should not use fd after calling this function.
func NewLinkFromFD(fd int) (Link, error) {
sysFD, err := sys.NewFD(fd)
if err != nil {
return nil, err
}
return wrapRawLink(&RawLink{fd: sysFD})
}
// LoadPinnedLink loads a link that was persisted into a bpffs.
func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) {
raw, err := loadPinnedRawLink(fileName, opts)
@@ -59,10 +71,15 @@ func LoadPinnedLink(fileName string, opts *ebpf.LoadPinOptions) (Link, error) {
// wrap a RawLink in a more specific type if possible.
//
// The function takes ownership of raw and closes it on error.
func wrapRawLink(raw *RawLink) (Link, error) {
func wrapRawLink(raw *RawLink) (_ Link, err error) {
defer func() {
if err != nil {
raw.Close()
}
}()
info, err := raw.Info()
if err != nil {
raw.Close()
return nil, err
}
@@ -77,6 +94,10 @@ func wrapRawLink(raw *RawLink) (Link, error) {
return &Iter{*raw}, nil
case NetNsType:
return &NetNsLink{*raw}, nil
case KprobeMultiType:
return &kprobeMultiLink{*raw}, nil
case PerfEventType:
return nil, fmt.Errorf("recovering perf event fd: %w", ErrNotSupported)
default:
return raw, nil
}
@@ -172,12 +193,12 @@ func AttachRawLink(opts RawLinkOptions) (*RawLink, error) {
TargetFd: uint32(opts.Target),
ProgFd: uint32(progFd),
AttachType: sys.AttachType(opts.Attach),
TargetBtfId: uint32(opts.BTF),
TargetBtfId: opts.BTF,
Flags: opts.Flags,
}
fd, err := sys.LinkCreate(&attr)
if err != nil {
return nil, fmt.Errorf("can't create link: %s", err)
return nil, fmt.Errorf("create link: %w", err)
}
return &RawLink{fd, ""}, nil
@@ -230,6 +251,11 @@ func (l *RawLink) Unpin() error {
return nil
}
// IsPinned returns true if the Link has a non-empty pinned path.
func (l *RawLink) IsPinned() bool {
return l.pinnedPath != ""
}
// Update implements the Link interface.
func (l *RawLink) Update(new *ebpf.Program) error {
return l.UpdateArgs(RawLinkUpdateOptions{
@@ -280,27 +306,24 @@ func (l *RawLink) Info() (*Info, error) {
switch info.Type {
case CgroupType:
extra = &CgroupInfo{}
case IterType:
// not supported
case NetNsType:
extra = &NetNsInfo{}
case RawTracepointType:
// not supported
case TracingType:
extra = &TracingInfo{}
case XDPType:
extra = &XDPInfo{}
case PerfEventType:
// no extra
case RawTracepointType, IterType,
PerfEventType, KprobeMultiType:
// Extra metadata not supported.
default:
return nil, fmt.Errorf("unknown link info type: %d", info.Type)
}
if info.Type != RawTracepointType && info.Type != IterType && info.Type != PerfEventType {
if extra != nil {
buf := bytes.NewReader(info.Extra[:])
err := binary.Read(buf, internal.NativeEndian, extra)
if err != nil {
return nil, fmt.Errorf("can not read extra link info: %w", err)
return nil, fmt.Errorf("cannot read extra link info: %w", err)
}
}

View File

@@ -1,20 +1,16 @@
package link
import (
"bytes"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/tracefs"
"github.com/cilium/ebpf/internal/unix"
)
@@ -42,67 +38,41 @@ import (
// stops any further invocations of the attached eBPF program.
var (
tracefsPath = "/sys/kernel/debug/tracing"
errInvalidInput = errors.New("invalid input")
errInvalidInput = tracefs.ErrInvalidInput
)
const (
perfAllThreads = -1
)
type perfEventType uint8
const (
tracepointEvent perfEventType = iota
kprobeEvent
kretprobeEvent
uprobeEvent
uretprobeEvent
)
// A perfEvent represents a perf event kernel object. Exactly one eBPF program
// can be attached to it. It is created based on a tracefs trace event or a
// Performance Monitoring Unit (PMU).
type perfEvent struct {
// The event type determines the types of programs that can be attached.
typ perfEventType
// Group and name of the tracepoint/kprobe/uprobe.
group string
name string
// PMU event ID read from sysfs. Valid IDs are non-zero.
pmuID uint64
// ID of the trace event read from tracefs. Valid IDs are non-zero.
tracefsID uint64
// User provided arbitrary value.
cookie uint64
// Trace event backing this perfEvent. May be nil.
tracefsEvent *tracefs.Event
// This is the perf event FD.
fd *sys.FD
}
func newPerfEvent(fd *sys.FD, event *tracefs.Event) *perfEvent {
pe := &perfEvent{event, fd}
// Both event and fd have their own finalizer, but we want to
// guarantee that they are closed in a certain order.
runtime.SetFinalizer(pe, (*perfEvent).Close)
return pe
}
func (pe *perfEvent) Close() error {
runtime.SetFinalizer(pe, nil)
if err := pe.fd.Close(); err != nil {
return fmt.Errorf("closing perf event fd: %w", err)
}
switch pe.typ {
case kprobeEvent, kretprobeEvent:
// Clean up kprobe tracefs entry.
if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name)
}
case uprobeEvent, uretprobeEvent:
// Clean up uprobe tracefs entry.
if pe.tracefsID != 0 {
return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name)
}
case tracepointEvent:
// Tracepoint trace events don't hold any extra resources.
return nil
if pe.tracefsEvent != nil {
return pe.tracefsEvent.Close()
}
return nil
@@ -136,10 +106,14 @@ func (pl *perfEventLink) Unpin() error {
}
func (pl *perfEventLink) Close() error {
if err := pl.pe.Close(); err != nil {
return fmt.Errorf("perf event link close: %w", err)
if err := pl.fd.Close(); err != nil {
return fmt.Errorf("perf link close: %w", err)
}
return pl.fd.Close()
if err := pl.pe.Close(); err != nil {
return fmt.Errorf("perf event close: %w", err)
}
return nil
}
func (pl *perfEventLink) Update(prog *ebpf.Program) error {
@@ -183,7 +157,7 @@ func (pi *perfEventIoctl) Info() (*Info, error) {
// attach the given eBPF prog to the perf event stored in pe.
// pe must contain a valid perf event fd.
// prog's type must match the program type stored in pe.
func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
func attachPerfEvent(pe *perfEvent, prog *ebpf.Program, cookie uint64) (Link, error) {
if prog == nil {
return nil, errors.New("cannot attach a nil program")
}
@@ -191,30 +165,18 @@ func attachPerfEvent(pe *perfEvent, prog *ebpf.Program) (Link, error) {
return nil, fmt.Errorf("invalid program: %w", sys.ErrClosedFd)
}
switch pe.typ {
case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent:
if t := prog.Type(); t != ebpf.Kprobe {
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t)
}
case tracepointEvent:
if t := prog.Type(); t != ebpf.TracePoint {
return nil, fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t)
}
default:
return nil, fmt.Errorf("unknown perf event type: %d", pe.typ)
if err := haveBPFLinkPerfEvent(); err == nil {
return attachPerfEventLink(pe, prog, cookie)
}
if err := haveBPFLinkPerfEvent(); err == nil {
return attachPerfEventLink(pe, prog)
if cookie != 0 {
return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
}
return attachPerfEventIoctl(pe, prog)
}
func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, error) {
if pe.cookie != 0 {
return nil, fmt.Errorf("cookies are not supported: %w", ErrNotSupported)
}
// Assign the eBPF program to the perf event.
err := unix.IoctlSetInt(pe.fd.Int(), unix.PERF_EVENT_IOC_SET_BPF, prog.FD())
if err != nil {
@@ -226,32 +188,24 @@ func attachPerfEventIoctl(pe *perfEvent, prog *ebpf.Program) (*perfEventIoctl, e
return nil, fmt.Errorf("enable perf event: %s", err)
}
pi := &perfEventIoctl{pe}
// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pi, (*perfEventIoctl).Close)
return pi, nil
return &perfEventIoctl{pe}, nil
}
// Use the bpf api to attach the perf event (BPF_LINK_TYPE_PERF_EVENT, 5.15+).
//
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program) (*perfEventLink, error) {
func attachPerfEventLink(pe *perfEvent, prog *ebpf.Program, cookie uint64) (*perfEventLink, error) {
fd, err := sys.LinkCreatePerfEvent(&sys.LinkCreatePerfEventAttr{
ProgFd: uint32(prog.FD()),
TargetFd: pe.fd.Uint(),
AttachType: sys.BPF_PERF_EVENT,
BpfCookie: pe.cookie,
BpfCookie: cookie,
})
if err != nil {
return nil, fmt.Errorf("cannot create bpf perf link: %v", err)
}
pl := &perfEventLink{RawLink{fd: fd}, pe}
// Close the perf event when its reference is lost to avoid leaking system resources.
runtime.SetFinalizer(pl, (*perfEventLink).Close)
return pl, nil
return &perfEventLink{RawLink{fd: fd}, pe}, nil
}
// unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str.
@@ -263,40 +217,6 @@ func unsafeStringPtr(str string) (unsafe.Pointer, error) {
return unsafe.Pointer(p), nil
}
// getTraceEventID reads a trace event's ID from tracefs given its group and name.
// The kernel requires group and name to be alphanumeric or underscore.
//
// name automatically has its invalid symbols converted to underscores so the caller
// can pass a raw symbol name, e.g. a kernel symbol containing dots.
func getTraceEventID(group, name string) (uint64, error) {
name = sanitizeSymbol(name)
tid, err := uint64FromFile(tracefsPath, "events", group, name, "id")
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist)
}
if err != nil {
return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
}
return tid, nil
}
// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier)
// from /sys/bus/event_source/devices/<pmu>/type.
//
// Returns ErrNotSupported if the pmu type is not supported.
func getPMUEventType(typ probeType) (uint64, error) {
et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type")
if errors.Is(err, os.ErrNotExist) {
return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported)
}
if err != nil {
return 0, fmt.Errorf("reading pmu type %s: %w", typ, err)
}
return et, nil
}
// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide
// [k,u]probes created by writing to <tracefs>/[k,u]probe_events are tracepoints
// behind the scenes, and can be attached to using these perf events.
@@ -317,30 +237,11 @@ func openTracepointPerfEvent(tid uint64, pid int) (*sys.FD, error) {
return sys.NewFD(fd)
}
// uint64FromFile reads a uint64 from a file. All elements of path are sanitized
// and joined onto base. Returns error if base no longer prefixes the path after
// joining all components.
func uint64FromFile(base string, path ...string) (uint64, error) {
l := filepath.Join(path...)
p := filepath.Join(base, l)
if !strings.HasPrefix(p, base) {
return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput)
}
data, err := os.ReadFile(p)
if err != nil {
return 0, fmt.Errorf("reading file %s: %w", p, err)
}
et := bytes.TrimSpace(data)
return strconv.ParseUint(string(et), 10, 64)
}
// Probe BPF perf link.
//
// https://elixir.bootlin.com/linux/v5.16.8/source/kernel/bpf/syscall.c#L4307
// https://github.com/torvalds/linux/commit/b89fbfbb854c9afc3047e8273cc3a694650b802e
var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", func() error {
var haveBPFLinkPerfEvent = internal.NewFeatureTest("bpf_link_perf_event", "5.15", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Name: "probe_bpf_perf_link",
Type: ebpf.Kprobe,
@@ -367,28 +268,3 @@ var haveBPFLinkPerfEvent = internal.FeatureTest("bpf_link_perf_event", "5.15", f
}
return err
})
// isValidTraceID implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_]*$".
//
// Trace event groups, names and kernel symbols must adhere to this set
// of characters. Non-empty, first character must not be a number, all
// characters must be alphanumeric or underscore.
func isValidTraceID(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
default:
return false
}
}
return true
}

View File

@@ -1,25 +0,0 @@
package link
import (
"fmt"
"runtime"
)
func platformPrefix(symbol string) string {
prefix := runtime.GOARCH
// per https://github.com/golang/go/blob/master/src/go/build/syslist.go
switch prefix {
case "386":
prefix = "ia32"
case "amd64", "amd64p32":
prefix = "x64"
case "arm64", "arm64be":
prefix = "arm64"
default:
return symbol
}
return fmt.Sprintf("__%s_%s", prefix, symbol)
}

63
vendor/github.com/cilium/ebpf/link/query.go generated vendored Normal file
View File

@@ -0,0 +1,63 @@
package link
import (
"fmt"
"os"
"unsafe"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/sys"
)
// QueryOptions defines additional parameters when querying for programs.
type QueryOptions struct {
// Path can be a path to a cgroup, netns or LIRC2 device
Path string
// Attach specifies the AttachType of the programs queried for
Attach ebpf.AttachType
// QueryFlags are flags for BPF_PROG_QUERY, e.g. BPF_F_QUERY_EFFECTIVE
QueryFlags uint32
}
// QueryPrograms retrieves ProgramIDs associated with the AttachType.
//
// Returns (nil, nil) if there are no programs attached to the queried kernel
// resource. Calling QueryPrograms on a kernel missing PROG_QUERY will result in
// ErrNotSupported.
func QueryPrograms(opts QueryOptions) ([]ebpf.ProgramID, error) {
if haveProgQuery() != nil {
return nil, fmt.Errorf("can't query program IDs: %w", ErrNotSupported)
}
f, err := os.Open(opts.Path)
if err != nil {
return nil, fmt.Errorf("can't open file: %s", err)
}
defer f.Close()
// query the number of programs to allocate correct slice size
attr := sys.ProgQueryAttr{
TargetFd: uint32(f.Fd()),
AttachType: sys.AttachType(opts.Attach),
QueryFlags: opts.QueryFlags,
}
if err := sys.ProgQuery(&attr); err != nil {
return nil, fmt.Errorf("can't query program count: %w", err)
}
// return nil if no progs are attached
if attr.ProgCount == 0 {
return nil, nil
}
// we have at least one prog, so we query again
progIds := make([]ebpf.ProgramID, attr.ProgCount)
attr.ProgIds = sys.NewPointer(unsafe.Pointer(&progIds[0]))
attr.ProgCount = uint32(len(progIds))
if err := sys.ProgQuery(&attr); err != nil {
return nil, fmt.Errorf("can't query program IDs: %w", err)
}
return progIds, nil
}

View File

@@ -15,7 +15,7 @@ func AttachSocketFilter(conn syscall.Conn, program *ebpf.Program) error {
}
var ssoErr error
err = rawConn.Control(func(fd uintptr) {
ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD())
ssoErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_ATTACH_BPF, program.FD())
})
if ssoErr != nil {
return ssoErr
@@ -31,7 +31,7 @@ func DetachSocketFilter(conn syscall.Conn) error {
}
var ssoErr error
err = rawConn.Control(func(fd uintptr) {
ssoErr = syscall.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0)
ssoErr = unix.SetsockoptInt(int(fd), unix.SOL_SOCKET, unix.SO_DETACH_BPF, 0)
})
if ssoErr != nil {
return ssoErr

View File

@@ -23,9 +23,10 @@ const (
NetNsType = sys.BPF_LINK_TYPE_NETNS
XDPType = sys.BPF_LINK_TYPE_XDP
PerfEventType = sys.BPF_LINK_TYPE_PERF_EVENT
KprobeMultiType = sys.BPF_LINK_TYPE_KPROBE_MULTI
)
var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() error {
var haveProgAttach = internal.NewFeatureTest("BPF_PROG_ATTACH", "4.10", func() error {
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
Type: ebpf.CGroupSKB,
License: "MIT",
@@ -45,7 +46,7 @@ var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() erro
return nil
})
var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replacement", "5.5", func() error {
var haveProgAttachReplace = internal.NewFeatureTest("BPF_PROG_ATTACH atomic replacement of MULTI progs", "5.5", func() error {
if err := haveProgAttach(); err != nil {
return err
}
@@ -85,7 +86,7 @@ var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replace
return err
})
var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error {
var haveBPFLink = internal.NewFeatureTest("bpf_link", "5.7", func() error {
attr := sys.LinkCreateAttr{
// This is a hopefully invalid file descriptor, which triggers EBADF.
TargetFd: ^uint32(0),
@@ -101,3 +102,22 @@ var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error {
}
return err
})
var haveProgQuery = internal.NewFeatureTest("BPF_PROG_QUERY", "4.15", func() error {
attr := sys.ProgQueryAttr{
// We rely on this being checked during the syscall.
// With an otherwise correct payload we expect EBADF here
// as an indication that the feature is present.
TargetFd: ^uint32(0),
AttachType: sys.AttachType(ebpf.AttachCGroupInetIngress),
}
err := sys.ProgQuery(&attr)
if errors.Is(err, unix.EINVAL) {
return internal.ErrNotSupported
}
if errors.Is(err, unix.EBADF) {
return nil
}
return err
})

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal/tracefs"
)
// TracepointOptions defines additional parameters that will be used
@@ -17,7 +18,7 @@ type TracepointOptions struct {
}
// Tracepoint attaches the given eBPF program to the tracepoint with the given
// group and name. See /sys/kernel/debug/tracing/events to find available
// group and name. See /sys/kernel/tracing/events to find available
// tracepoints. The top-level directory is the group, the event's subdirectory
// is the name. Example:
//
@@ -36,14 +37,11 @@ func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions)
if prog == nil {
return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput)
}
if !isValidTraceID(group) || !isValidTraceID(name) {
return nil, fmt.Errorf("group and name '%s/%s' must be alphanumeric or underscore: %w", group, name, errInvalidInput)
}
if prog.Type() != ebpf.TracePoint {
return nil, fmt.Errorf("eBPF program type %s is not a Tracepoint: %w", prog.Type(), errInvalidInput)
}
tid, err := getTraceEventID(group, name)
tid, err := tracefs.EventID(group, name)
if err != nil {
return nil, err
}
@@ -58,16 +56,9 @@ func Tracepoint(group, name string, prog *ebpf.Program, opts *TracepointOptions)
cookie = opts.Cookie
}
pe := &perfEvent{
typ: tracepointEvent,
group: group,
name: name,
tracefsID: tid,
cookie: cookie,
fd: fd,
}
pe := newPerfEvent(fd, nil)
lnk, err := attachPerfEvent(pe, prog)
lnk, err := attachPerfEvent(pe, prog, cookie)
if err != nil {
pe.Close()
return nil, err

View File

@@ -1,11 +1,13 @@
package link
import (
"errors"
"fmt"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
type tracing struct {
@@ -70,6 +72,10 @@ func AttachFreplace(targetProg *ebpf.Program, name string, prog *ebpf.Program) (
Attach: ebpf.AttachNone,
BTF: typeID,
})
if errors.Is(err, sys.ENOTSUPP) {
// This may be returned by bpf_tracing_prog_attach via bpf_arch_text_poke.
return nil, fmt.Errorf("create raw tracepoint: %w", ErrNotSupported)
}
if err != nil {
return nil, err
}
@@ -82,25 +88,71 @@ type TracingOptions struct {
// AttachTraceFEntry/AttachTraceFExit/AttachModifyReturn or
// AttachTraceRawTp.
Program *ebpf.Program
// Program attach type. Can be one of:
// - AttachTraceFEntry
// - AttachTraceFExit
// - AttachModifyReturn
// - AttachTraceRawTp
// This field is optional.
AttachType ebpf.AttachType
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
Cookie uint64
}
type LSMOptions struct {
// Program must be of type LSM with attach type
// AttachLSMMac.
Program *ebpf.Program
// Arbitrary value that can be fetched from an eBPF program
// via `bpf_get_attach_cookie()`.
Cookie uint64
}
// attachBTFID links all BPF program types (Tracing/LSM) that they attach to a btf_id.
func attachBTFID(program *ebpf.Program) (Link, error) {
func attachBTFID(program *ebpf.Program, at ebpf.AttachType, cookie uint64) (Link, error) {
if program.FD() < 0 {
return nil, fmt.Errorf("invalid program %w", sys.ErrClosedFd)
}
fd, err := sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{
ProgFd: uint32(program.FD()),
})
if err != nil {
return nil, err
var (
fd *sys.FD
err error
)
switch at {
case ebpf.AttachTraceFEntry, ebpf.AttachTraceFExit, ebpf.AttachTraceRawTp,
ebpf.AttachModifyReturn, ebpf.AttachLSMMac:
// Attach via BPF link
fd, err = sys.LinkCreateTracing(&sys.LinkCreateTracingAttr{
ProgFd: uint32(program.FD()),
AttachType: sys.AttachType(at),
Cookie: cookie,
})
if err == nil {
break
}
if !errors.Is(err, unix.EINVAL) && !errors.Is(err, sys.ENOTSUPP) {
return nil, fmt.Errorf("create tracing link: %w", err)
}
fallthrough
case ebpf.AttachNone:
// Attach via RawTracepointOpen
if cookie > 0 {
return nil, fmt.Errorf("create raw tracepoint with cookie: %w", ErrNotSupported)
}
fd, err = sys.RawTracepointOpen(&sys.RawTracepointOpenAttr{
ProgFd: uint32(program.FD()),
})
if errors.Is(err, sys.ENOTSUPP) {
// This may be returned by bpf_tracing_prog_attach via bpf_arch_text_poke.
return nil, fmt.Errorf("create raw tracepoint: %w", ErrNotSupported)
}
if err != nil {
return nil, fmt.Errorf("create raw tracepoint: %w", err)
}
default:
return nil, fmt.Errorf("invalid attach type: %s", at.String())
}
raw := RawLink{fd: fd}
@@ -115,8 +167,7 @@ func attachBTFID(program *ebpf.Program) (Link, error) {
// a raw_tracepoint link. Other types return a tracing link.
return &rawTracepoint{raw}, nil
}
return &tracing{RawLink: RawLink{fd: fd}}, nil
return &tracing{raw}, nil
}
// AttachTracing links a tracing (fentry/fexit/fmod_ret) BPF program or
@@ -127,7 +178,14 @@ func AttachTracing(opts TracingOptions) (Link, error) {
return nil, fmt.Errorf("invalid program type %s, expected Tracing", t)
}
return attachBTFID(opts.Program)
switch opts.AttachType {
case ebpf.AttachTraceFEntry, ebpf.AttachTraceFExit, ebpf.AttachModifyReturn,
ebpf.AttachTraceRawTp, ebpf.AttachNone:
default:
return nil, fmt.Errorf("invalid attach type: %s", opts.AttachType.String())
}
return attachBTFID(opts.Program, opts.AttachType, opts.Cookie)
}
// AttachLSM links a Linux security module (LSM) BPF Program to a BPF
@@ -137,5 +195,5 @@ func AttachLSM(opts LSMOptions) (Link, error) {
return nil, fmt.Errorf("invalid program type %s, expected LSM", t)
}
return attachBTFID(opts.Program)
return attachBTFID(opts.Program, ebpf.AttachLSMMac, opts.Cookie)
}

View File

@@ -5,27 +5,18 @@ import (
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"github.com/cilium/ebpf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/tracefs"
)
var (
uprobeEventsPath = filepath.Join(tracefsPath, "uprobe_events")
uprobeRetprobeBit = struct {
once sync.Once
value uint64
err error
}{}
uprobeRefCtrOffsetPMUPath = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/events/core.c#L9799
uprobeRefCtrOffsetShift = 32
haveRefCtrOffsetPMU = internal.FeatureTest("RefCtrOffsetPMU", "4.20", func() error {
haveRefCtrOffsetPMU = internal.NewFeatureTest("RefCtrOffsetPMU", "4.20", func() error {
_, err := os.Stat(uprobeRefCtrOffsetPMUPath)
if err != nil {
return internal.ErrNotSupported
@@ -44,6 +35,8 @@ type Executable struct {
path string
// Parsed ELF and dynamic symbols' addresses.
addresses map[string]uint64
// Keep track of symbol table lazy load.
addressesOnce sync.Once
}
// UprobeOptions defines additional parameters that will be used
@@ -77,11 +70,22 @@ type UprobeOptions struct {
//
// Needs kernel 5.15+.
Cookie uint64
// Prefix used for the event name if the uprobe must be attached using tracefs.
// The group name will be formatted as `<prefix>_<randomstr>`.
// The default empty string is equivalent to "ebpf" as the prefix.
TraceFSPrefix string
}
func (uo *UprobeOptions) cookie() uint64 {
if uo == nil {
return 0
}
return uo.Cookie
}
// To open a new Executable, use:
//
// OpenExecutable("/bin/bash")
// OpenExecutable("/bin/bash")
//
// The returned value can then be used to open Uprobe(s).
func OpenExecutable(path string) (*Executable, error) {
@@ -89,32 +93,21 @@ func OpenExecutable(path string) (*Executable, error) {
return nil, fmt.Errorf("path cannot be empty")
}
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("open file '%s': %w", path, err)
}
defer f.Close()
se, err := internal.NewSafeELFFile(f)
f, err := internal.OpenSafeELFFile(path)
if err != nil {
return nil, fmt.Errorf("parse ELF file: %w", err)
}
defer f.Close()
if se.Type != elf.ET_EXEC && se.Type != elf.ET_DYN {
if f.Type != elf.ET_EXEC && f.Type != elf.ET_DYN {
// ELF is not an executable or a shared object.
return nil, errors.New("the given file is not an executable or a shared object")
}
ex := Executable{
return &Executable{
path: path,
addresses: make(map[string]uint64),
}
if err := ex.load(se); err != nil {
return nil, err
}
return &ex, nil
}, nil
}
func (ex *Executable) load(f *internal.SafeELFFile) error {
@@ -171,6 +164,22 @@ func (ex *Executable) address(symbol string, opts *UprobeOptions) (uint64, error
return opts.Address + opts.Offset, nil
}
var err error
ex.addressesOnce.Do(func() {
var f *internal.SafeELFFile
f, err = internal.OpenSafeELFFile(ex.path)
if err != nil {
err = fmt.Errorf("parse ELF file: %w", err)
return
}
defer f.Close()
err = ex.load(f)
})
if err != nil {
return 0, fmt.Errorf("lazy load symbols: %w", err)
}
address, ok := ex.addresses[symbol]
if !ok {
return 0, fmt.Errorf("symbol %s: %w", symbol, ErrNoSymbol)
@@ -194,13 +203,13 @@ func (ex *Executable) address(symbol string, opts *UprobeOptions) (uint64, error
// given symbol starts executing in the given Executable.
// For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uprobe("main", prog, nil)
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uprobe("main", prog, nil)
//
// When using symbols which belongs to shared libraries,
// an offset must be provided via options:
//
// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123})
// up, err := ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123})
//
// Note: Setting the Offset field in the options supersedes the symbol's offset.
//
@@ -216,7 +225,7 @@ func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
return nil, err
}
lnk, err := attachPerfEvent(u, prog)
lnk, err := attachPerfEvent(u, prog, opts.cookie())
if err != nil {
u.Close()
return nil, err
@@ -228,13 +237,13 @@ func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
// Uretprobe attaches the given eBPF program to a perf event that fires right
// before the given symbol exits. For example, /bin/bash::main():
//
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uretprobe("main", prog, nil)
// ex, _ = OpenExecutable("/bin/bash")
// ex.Uretprobe("main", prog, nil)
//
// When using symbols which belongs to shared libraries,
// an offset must be provided via options:
//
// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123})
// up, err := ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123})
//
// Note: Setting the Offset field in the options supersedes the symbol's offset.
//
@@ -250,7 +259,7 @@ func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program, opts *UprobeO
return nil, err
}
lnk, err := attachPerfEvent(u, prog)
lnk, err := attachPerfEvent(u, prog, opts.cookie())
if err != nil {
u.Close()
return nil, err
@@ -288,18 +297,20 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
}
}
args := probeArgs{
symbol: symbol,
path: ex.path,
offset: offset,
pid: pid,
refCtrOffset: opts.RefCtrOffset,
ret: ret,
cookie: opts.Cookie,
args := tracefs.ProbeArgs{
Type: tracefs.Uprobe,
Symbol: symbol,
Path: ex.path,
Offset: offset,
Pid: pid,
RefCtrOffset: opts.RefCtrOffset,
Ret: ret,
Cookie: opts.Cookie,
Group: opts.TraceFSPrefix,
}
// Use uprobe PMU if the kernel has it available.
tp, err := pmuUprobe(args)
tp, err := pmuProbe(args)
if err == nil {
return tp, nil
}
@@ -308,66 +319,10 @@ func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOpti
}
// Use tracefs if uprobe PMU is missing.
args.symbol = sanitizeSymbol(symbol)
tp, err = tracefsUprobe(args)
tp, err = tracefsProbe(args)
if err != nil {
return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err)
}
return tp, nil
}
// pmuUprobe opens a perf event based on the uprobe PMU.
func pmuUprobe(args probeArgs) (*perfEvent, error) {
return pmuProbe(uprobeType, args)
}
// tracefsUprobe creates a Uprobe tracefs entry.
func tracefsUprobe(args probeArgs) (*perfEvent, error) {
return tracefsProbe(uprobeType, args)
}
// sanitizeSymbol replaces every invalid character for the tracefs api with an underscore.
// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_").
func sanitizeSymbol(s string) string {
var b strings.Builder
b.Grow(len(s))
var skip bool
for _, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z',
c >= 'A' && c <= 'Z',
c >= '0' && c <= '9':
skip = false
b.WriteByte(c)
default:
if !skip {
b.WriteByte('_')
skip = true
}
}
}
return b.String()
}
// uprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api.
func uprobeToken(args probeArgs) string {
po := fmt.Sprintf("%s:%#x", args.path, args.offset)
if args.refCtrOffset != 0 {
// This is not documented in Documentation/trace/uprobetracer.txt.
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564
po += fmt.Sprintf("(%#x)", args.refCtrOffset)
}
return po
}
func uretprobeBit() (uint64, error) {
uprobeRetprobeBit.once.Do(func() {
uprobeRetprobeBit.value, uprobeRetprobeBit.err = determineRetprobeBit(uprobeType)
})
return uprobeRetprobeBit.value, uprobeRetprobeBit.err
}