go.mod: runc v1.0.0
Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
committed by
Davanum Srinivas
parent
28bb59c080
commit
f913a42755
284
vendor/github.com/cilium/ebpf/link/kprobe.go
generated
vendored
284
vendor/github.com/cilium/ebpf/link/kprobe.go
generated
vendored
@@ -1,12 +1,16 @@
|
||||
package link
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/internal"
|
||||
@@ -15,13 +19,60 @@ import (
|
||||
|
||||
var (
|
||||
kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events")
|
||||
|
||||
kprobeRetprobeBit = struct {
|
||||
once sync.Once
|
||||
value uint64
|
||||
err error
|
||||
}{}
|
||||
)
|
||||
|
||||
type probeType uint8
|
||||
|
||||
const (
|
||||
kprobeType probeType = iota
|
||||
uprobeType
|
||||
)
|
||||
|
||||
func (pt probeType) String() string {
|
||||
if pt == kprobeType {
|
||||
return "kprobe"
|
||||
}
|
||||
return "uprobe"
|
||||
}
|
||||
|
||||
func (pt probeType) EventsPath() string {
|
||||
if pt == kprobeType {
|
||||
return kprobeEventsPath
|
||||
}
|
||||
return uprobeEventsPath
|
||||
}
|
||||
|
||||
func (pt probeType) PerfEventType(ret bool) perfEventType {
|
||||
if pt == kprobeType {
|
||||
if ret {
|
||||
return kretprobeEvent
|
||||
}
|
||||
return kprobeEvent
|
||||
}
|
||||
if ret {
|
||||
return uretprobeEvent
|
||||
}
|
||||
return uprobeEvent
|
||||
}
|
||||
|
||||
func (pt probeType) RetprobeBit() (uint64, error) {
|
||||
if pt == kprobeType {
|
||||
return kretprobeBit()
|
||||
}
|
||||
return uretprobeBit()
|
||||
}
|
||||
|
||||
// Kprobe attaches the given eBPF program to a perf event that fires when the
|
||||
// given kernel symbol starts executing. See /proc/kallsyms for available
|
||||
// symbols. For example, printk():
|
||||
//
|
||||
// Kprobe("printk")
|
||||
// Kprobe("printk", prog)
|
||||
//
|
||||
// The resulting Link must be Closed during program shutdown to avoid leaking
|
||||
// system resources.
|
||||
@@ -44,7 +95,7 @@ func Kprobe(symbol string, prog *ebpf.Program) (Link, error) {
|
||||
// before the given kernel symbol exits, with the function stack left intact.
|
||||
// See /proc/kallsyms for available symbols. For example, printk():
|
||||
//
|
||||
// Kretprobe("printk")
|
||||
// Kretprobe("printk", prog)
|
||||
//
|
||||
// The resulting Link must be Closed during program shutdown to avoid leaking
|
||||
// system resources.
|
||||
@@ -80,7 +131,10 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
|
||||
}
|
||||
|
||||
// Use kprobe PMU if the kernel has it available.
|
||||
tp, err := pmuKprobe(symbol, ret)
|
||||
tp, err := pmuKprobe(platformPrefix(symbol), ret)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
tp, err = pmuKprobe(symbol, ret)
|
||||
}
|
||||
if err == nil {
|
||||
return tp, nil
|
||||
}
|
||||
@@ -89,7 +143,10 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
|
||||
}
|
||||
|
||||
// Use tracefs if kprobe PMU is missing.
|
||||
tp, err = tracefsKprobe(symbol, ret)
|
||||
tp, err = tracefsKprobe(platformPrefix(symbol), ret)
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
tp, err = tracefsKprobe(symbol, ret)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err)
|
||||
}
|
||||
@@ -97,36 +154,70 @@ func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) {
|
||||
return tp, nil
|
||||
}
|
||||
|
||||
// pmuKprobe opens a perf event based on a Performance Monitoring Unit.
|
||||
// Requires at least 4.17 (e12f03d7031a "perf/core: Implement the
|
||||
// 'perf_kprobe' PMU").
|
||||
// Returns ErrNotSupported if the kernel doesn't support perf_kprobe PMU,
|
||||
// or os.ErrNotExist if the given symbol does not exist in the kernel.
|
||||
// pmuKprobe opens a perf event based on the kprobe PMU.
|
||||
// Returns os.ErrNotExist if the given symbol does not exist in the kernel.
|
||||
func pmuKprobe(symbol string, ret bool) (*perfEvent, error) {
|
||||
return pmuProbe(kprobeType, symbol, "", 0, ret)
|
||||
}
|
||||
|
||||
// pmuProbe opens a perf event based on a Performance Monitoring Unit.
|
||||
//
|
||||
// Requires at least a 4.17 kernel.
|
||||
// e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU"
|
||||
// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU"
|
||||
//
|
||||
// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU
|
||||
func pmuProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
|
||||
// Getting the PMU type will fail if the kernel doesn't support
|
||||
// the perf_kprobe PMU.
|
||||
et, err := getPMUEventType("kprobe")
|
||||
// the perf_[k,u]probe PMU.
|
||||
et, err := getPMUEventType(typ)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create a pointer to a NUL-terminated string for the kernel.
|
||||
sp, err := unsafeStringPtr(symbol)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// TODO: Parse the position of the bit from /sys/bus/event_source/devices/%s/format/retprobe.
|
||||
config := 0
|
||||
var config uint64
|
||||
if ret {
|
||||
config = 1
|
||||
bit, err := typ.RetprobeBit()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config |= 1 << bit
|
||||
}
|
||||
|
||||
attr := unix.PerfEventAttr{
|
||||
Type: uint32(et), // PMU event type read from sysfs
|
||||
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
|
||||
Config: uint64(config), // perf_kprobe PMU treats config as flags
|
||||
var (
|
||||
attr unix.PerfEventAttr
|
||||
sp unsafe.Pointer
|
||||
)
|
||||
switch typ {
|
||||
case kprobeType:
|
||||
// Create a pointer to a NUL-terminated string for the kernel.
|
||||
sp, err := unsafeStringPtr(symbol)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
attr = unix.PerfEventAttr{
|
||||
Type: uint32(et), // PMU event type read from sysfs
|
||||
Ext1: uint64(uintptr(sp)), // Kernel symbol to trace
|
||||
Config: config, // Retprobe flag
|
||||
}
|
||||
case uprobeType:
|
||||
sp, err := unsafeStringPtr(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
attr = unix.PerfEventAttr{
|
||||
// The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1,
|
||||
// since it added the config2 (Ext2) field. The Size field controls the
|
||||
// size of the internal buffer the kernel allocates for reading the
|
||||
// perf_event_attr argument from userspace.
|
||||
Size: unix.PERF_ATTR_SIZE_VER1,
|
||||
Type: uint32(et), // PMU event type read from sysfs
|
||||
Ext1: uint64(uintptr(sp)), // Uprobe path
|
||||
Ext2: offset, // Uprobe offset
|
||||
Config: config, // Retprobe flag
|
||||
}
|
||||
}
|
||||
|
||||
fd, err := unix.PerfEventOpen(&attr, perfAllThreads, 0, -1, unix.PERF_FLAG_FD_CLOEXEC)
|
||||
@@ -144,22 +235,27 @@ func pmuKprobe(symbol string, ret bool) (*perfEvent, error) {
|
||||
// Ensure the string pointer is not collected before PerfEventOpen returns.
|
||||
runtime.KeepAlive(sp)
|
||||
|
||||
// Kernel has perf_kprobe PMU available, initialize perf event.
|
||||
// Kernel has perf_[k,u]probe PMU available, initialize perf event.
|
||||
return &perfEvent{
|
||||
fd: internal.NewFD(uint32(fd)),
|
||||
pmuID: et,
|
||||
name: symbol,
|
||||
ret: ret,
|
||||
progType: ebpf.Kprobe,
|
||||
fd: internal.NewFD(uint32(fd)),
|
||||
pmuID: et,
|
||||
name: symbol,
|
||||
typ: typ.PerfEventType(ret),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// tracefsKprobe creates a trace event by writing an entry to <tracefs>/kprobe_events.
|
||||
// A new trace event group name is generated on every call to support creating
|
||||
// multiple trace events for the same kernel symbol. A perf event is then opened
|
||||
// on the newly-created trace event and returned to the caller.
|
||||
// tracefsKprobe creates a Kprobe tracefs entry.
|
||||
func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
|
||||
return tracefsProbe(kprobeType, symbol, "", 0, ret)
|
||||
}
|
||||
|
||||
// tracefsProbe creates a trace event by writing an entry to <tracefs>/[k,u]probe_events.
|
||||
// A new trace event group name is generated on every call to support creating
|
||||
// multiple trace events for the same kernel or userspace symbol.
|
||||
// Path and offset are only set in the case of uprobe(s) and are used to set
|
||||
// the executable/library path on the filesystem and the offset where the probe is inserted.
|
||||
// A perf event is then opened on the newly-created trace event and returned to the caller.
|
||||
func tracefsProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*perfEvent, error) {
|
||||
// Generate a random string for each trace event we attempt to create.
|
||||
// This value is used as the 'group' token in tracefs to allow creating
|
||||
// multiple kprobe trace events with the same name.
|
||||
@@ -176,14 +272,13 @@ func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
|
||||
if err == nil {
|
||||
return nil, fmt.Errorf("trace event already exists: %s/%s", group, symbol)
|
||||
}
|
||||
// The read is expected to fail with ErrNotSupported due to a non-existing event.
|
||||
if err != nil && !errors.Is(err, ErrNotSupported) {
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return nil, fmt.Errorf("checking trace event %s/%s: %w", group, symbol, err)
|
||||
}
|
||||
|
||||
// Create the kprobe trace event using tracefs.
|
||||
if err := createTraceFSKprobeEvent(group, symbol, ret); err != nil {
|
||||
return nil, fmt.Errorf("creating kprobe event on tracefs: %w", err)
|
||||
// Create the [k,u]probe trace event using tracefs.
|
||||
if err := createTraceFSProbeEvent(typ, group, symbol, path, offset, ret); err != nil {
|
||||
return nil, fmt.Errorf("creating probe entry on tracefs: %w", err)
|
||||
}
|
||||
|
||||
// Get the newly-created trace event's id.
|
||||
@@ -202,65 +297,83 @@ func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) {
|
||||
fd: fd,
|
||||
group: group,
|
||||
name: symbol,
|
||||
ret: ret,
|
||||
tracefsID: tid,
|
||||
progType: ebpf.Kprobe, // kernel only allows attaching kprobe programs to kprobe events
|
||||
typ: typ.PerfEventType(ret),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// createTraceFSKprobeEvent creates a new ephemeral trace event by writing to
|
||||
// <tracefs>/kprobe_events. Returns ErrNotSupported if symbol is not a valid
|
||||
// kernel symbol, or if it is not traceable with kprobes.
|
||||
func createTraceFSKprobeEvent(group, symbol string, ret bool) error {
|
||||
// createTraceFSProbeEvent creates a new ephemeral trace event by writing to
|
||||
// <tracefs>/[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid
|
||||
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
|
||||
// if a probe with the same group and symbol already exists.
|
||||
func createTraceFSProbeEvent(typ probeType, group, symbol, path string, offset uint64, ret bool) error {
|
||||
// Open the kprobe_events file in tracefs.
|
||||
f, err := os.OpenFile(kprobeEventsPath, os.O_APPEND|os.O_WRONLY, 0666)
|
||||
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening kprobe_events: %w", err)
|
||||
return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
|
||||
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
|
||||
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
|
||||
// -:[GRP/]EVENT : Clear a probe
|
||||
//
|
||||
// Some examples:
|
||||
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
|
||||
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
|
||||
//
|
||||
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
|
||||
// kernel default to NR_CPUS. This is desired in most eBPF cases since
|
||||
// subsampling or rate limiting logic can be more accurately implemented in
|
||||
// the eBPF program itself. See Documentation/kprobes.txt for more details.
|
||||
pe := fmt.Sprintf("%s:%s/%s %s", kprobePrefix(ret), group, symbol, symbol)
|
||||
var pe string
|
||||
switch typ {
|
||||
case kprobeType:
|
||||
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
|
||||
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
|
||||
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
|
||||
// -:[GRP/]EVENT : Clear a probe
|
||||
//
|
||||
// Some examples:
|
||||
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
|
||||
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
|
||||
//
|
||||
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
|
||||
// kernel default to NR_CPUS. This is desired in most eBPF cases since
|
||||
// subsampling or rate limiting logic can be more accurately implemented in
|
||||
// the eBPF program itself.
|
||||
// See Documentation/kprobes.txt for more details.
|
||||
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, symbol)
|
||||
case uprobeType:
|
||||
// The uprobe_events syntax is as follows:
|
||||
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
|
||||
// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
|
||||
// -:[GRP/]EVENT : Clear a probe
|
||||
//
|
||||
// Some examples:
|
||||
// r:ebpf_1234/readline /bin/bash:0x12345
|
||||
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345
|
||||
//
|
||||
// See Documentation/trace/uprobetracer.txt for more details.
|
||||
pathOffset := uprobePathOffset(path, offset)
|
||||
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, pathOffset)
|
||||
}
|
||||
_, err = f.WriteString(pe)
|
||||
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
|
||||
// when trying to create a kretprobe for a missing symbol. Make sure ENOENT
|
||||
// is returned to the caller.
|
||||
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) {
|
||||
return fmt.Errorf("kernel symbol %s not found: %w", symbol, os.ErrNotExist)
|
||||
return fmt.Errorf("symbol %s not found: %w", symbol, os.ErrNotExist)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing '%s' to kprobe_events: %w", pe, err)
|
||||
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// closeTraceFSKprobeEvent removes the kprobe with the given group, symbol and kind
|
||||
// from <tracefs>/kprobe_events.
|
||||
func closeTraceFSKprobeEvent(group, symbol string) error {
|
||||
f, err := os.OpenFile(kprobeEventsPath, os.O_APPEND|os.O_WRONLY, 0666)
|
||||
// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol
|
||||
// from <tracefs>/[k,u]probe_events.
|
||||
func closeTraceFSProbeEvent(typ probeType, group, symbol string) error {
|
||||
f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening kprobe_events: %w", err)
|
||||
return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// See kprobe_events syntax above. Kprobe type does not need to be specified
|
||||
// See [k,u]probe_events syntax above. The probe type does not need to be specified
|
||||
// for removals.
|
||||
pe := fmt.Sprintf("-:%s/%s", group, symbol)
|
||||
if _, err = f.WriteString(pe); err != nil {
|
||||
return fmt.Errorf("writing '%s' to kprobe_events: %w", pe, err)
|
||||
return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -288,9 +401,38 @@ func randomGroup(prefix string) (string, error) {
|
||||
return group, nil
|
||||
}
|
||||
|
||||
func kprobePrefix(ret bool) string {
|
||||
func probePrefix(ret bool) string {
|
||||
if ret {
|
||||
return "r"
|
||||
}
|
||||
return "p"
|
||||
}
|
||||
|
||||
// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit
|
||||
// from /sys/bus/event_source/devices/<pmu>/format/retprobe.
|
||||
func determineRetprobeBit(typ probeType) (uint64, error) {
|
||||
p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe")
|
||||
|
||||
data, err := ioutil.ReadFile(p)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var rp uint64
|
||||
n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("parse retprobe bit: %w", err)
|
||||
}
|
||||
if n != 1 {
|
||||
return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n)
|
||||
}
|
||||
|
||||
return rp, nil
|
||||
}
|
||||
|
||||
func kretprobeBit() (uint64, error) {
|
||||
kprobeRetprobeBit.once.Do(func() {
|
||||
kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType)
|
||||
})
|
||||
return kprobeRetprobeBit.value, kprobeRetprobeBit.err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user