build(deps): bump github.com/containerd/cgroups/v3 from 3.0.2 to 3.0.3

Bumps [github.com/containerd/cgroups/v3](https://github.com/containerd/cgroups) from 3.0.2 to 3.0.3.
- [Release notes](https://github.com/containerd/cgroups/releases)
- [Commits](https://github.com/containerd/cgroups/compare/v3.0.2...v3.0.3)

---
updated-dependencies:
- dependency-name: github.com/containerd/cgroups/v3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
This commit is contained in:
dependabot[bot]
2023-12-29 11:45:53 +00:00
committed by GitHub
parent 1f76ca4081
commit 5387747e92
119 changed files with 8743 additions and 3476 deletions

View File

@@ -1,6 +1,8 @@
package internal
import "golang.org/x/exp/constraints"
// Align returns 'n' updated to 'alignment' boundary.
func Align(n, alignment int) int {
return (int(n) + alignment - 1) / alignment * alignment
func Align[I constraints.Integer](n, alignment I) I {
return (n + alignment - 1) / alignment * alignment
}

31
vendor/github.com/cilium/ebpf/internal/buffer.go generated vendored Normal file
View File

@@ -0,0 +1,31 @@
package internal
import (
"bytes"
"sync"
)
var bytesBufferPool = sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
// NewBuffer retrieves a [bytes.Buffer] from a pool an re-initialises it.
//
// The returned buffer should be passed to [PutBuffer].
func NewBuffer(buf []byte) *bytes.Buffer {
wr := bytesBufferPool.Get().(*bytes.Buffer)
// Reinitialize the Buffer with a new backing slice since it is returned to
// the caller by wr.Bytes() below. Pooling is faster despite calling
// NewBuffer. The pooled alloc is still reused, it only needs to be zeroed.
*wr = *bytes.NewBuffer(buf)
return wr
}
// PutBuffer releases a buffer to the pool.
func PutBuffer(buf *bytes.Buffer) {
// Release reference to the backing buffer.
*buf = *bytes.NewBuffer(nil)
bytesBufferPool.Put(buf)
}

View File

@@ -4,24 +4,13 @@ import (
"fmt"
"os"
"strings"
"sync"
)
var sysCPU struct {
once sync.Once
err error
num int
}
// PossibleCPUs returns the max number of CPUs a system may possibly have
// Logical CPU numbers must be of the form 0-n
func PossibleCPUs() (int, error) {
sysCPU.once.Do(func() {
sysCPU.num, sysCPU.err = parseCPUsFromFile("/sys/devices/system/cpu/possible")
})
return sysCPU.num, sysCPU.err
}
var PossibleCPUs = Memoize(func() (int, error) {
return parseCPUsFromFile("/sys/devices/system/cpu/possible")
})
func parseCPUsFromFile(path string) (int, error) {
spec, err := os.ReadFile(path)

91
vendor/github.com/cilium/ebpf/internal/deque.go generated vendored Normal file
View File

@@ -0,0 +1,91 @@
package internal
import "math/bits"
// Deque implements a double ended queue.
type Deque[T any] struct {
elems []T
read, write uint64
mask uint64
}
// Reset clears the contents of the deque while retaining the backing buffer.
func (dq *Deque[T]) Reset() {
var zero T
for i := dq.read; i < dq.write; i++ {
dq.elems[i&dq.mask] = zero
}
dq.read, dq.write = 0, 0
}
func (dq *Deque[T]) Empty() bool {
return dq.read == dq.write
}
// Push adds an element to the end.
func (dq *Deque[T]) Push(e T) {
dq.Grow(1)
dq.elems[dq.write&dq.mask] = e
dq.write++
}
// Shift returns the first element or the zero value.
func (dq *Deque[T]) Shift() T {
var zero T
if dq.Empty() {
return zero
}
index := dq.read & dq.mask
t := dq.elems[index]
dq.elems[index] = zero
dq.read++
return t
}
// Pop returns the last element or the zero value.
func (dq *Deque[T]) Pop() T {
var zero T
if dq.Empty() {
return zero
}
dq.write--
index := dq.write & dq.mask
t := dq.elems[index]
dq.elems[index] = zero
return t
}
// Grow the deque's capacity, if necessary, to guarantee space for another n
// elements.
func (dq *Deque[T]) Grow(n int) {
have := dq.write - dq.read
need := have + uint64(n)
if need < have {
panic("overflow")
}
if uint64(len(dq.elems)) >= need {
return
}
// Round up to the new power of two which is at least 8.
// See https://jameshfisher.com/2018/03/30/round-up-power-2/
capacity := 1 << (64 - bits.LeadingZeros64(need-1))
if capacity < 8 {
capacity = 8
}
elems := make([]T, have, capacity)
pivot := dq.read & dq.mask
copied := copy(elems, dq.elems[pivot:])
copy(elems[copied:], dq.elems[:pivot])
dq.elems = elems[:capacity]
dq.mask = uint64(capacity) - 1
dq.read, dq.write = 0, have
}

View File

@@ -1,5 +1,4 @@
//go:build armbe || arm64be || mips || mips64 || mips64p32 || ppc64 || s390 || s390x || sparc || sparc64
// +build armbe arm64be mips mips64 mips64p32 ppc64 s390 s390x sparc sparc64
package internal

View File

@@ -1,5 +1,4 @@
//go:build 386 || amd64 || amd64p32 || arm || arm64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
// +build 386 amd64 amd64p32 arm arm64 mipsle mips64le mips64p32le ppc64le riscv64
//go:build 386 || amd64 || amd64p32 || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || ppc64le || riscv64
package internal

View File

@@ -7,32 +7,25 @@ import (
"strings"
)
// ErrorWithLog returns an error which includes logs from the kernel verifier.
// ErrorWithLog wraps err in a VerifierError that includes the parsed verifier
// log buffer.
//
// The default error output is a summary of the full log. The latter can be
// accessed via VerifierError.Log or by formatting the error, see Format.
//
// A set of heuristics is used to determine whether the log has been truncated.
func ErrorWithLog(err error, log []byte) *VerifierError {
func ErrorWithLog(source string, err error, log []byte, truncated bool) *VerifierError {
const whitespace = "\t\r\v\n "
// Convert verifier log C string by truncating it on the first 0 byte
// and trimming trailing whitespace before interpreting as a Go string.
truncated := false
if i := bytes.IndexByte(log, 0); i != -1 {
if i == len(log)-1 && !bytes.HasSuffix(log[:i], []byte{'\n'}) {
// The null byte is at the end of the buffer and it's not preceded
// by a newline character. Most likely the buffer was too short.
truncated = true
}
log = log[:i]
} else if len(log) > 0 {
// No null byte? Dodgy!
truncated = true
}
log = bytes.Trim(log, whitespace)
if len(log) == 0 {
return &VerifierError{source, err, nil, truncated}
}
logLines := bytes.Split(log, []byte{'\n'})
lines := make([]string, 0, len(logLines))
for _, line := range logLines {
@@ -41,13 +34,14 @@ func ErrorWithLog(err error, log []byte) *VerifierError {
lines = append(lines, string(bytes.TrimRight(line, whitespace)))
}
return &VerifierError{err, lines, truncated}
return &VerifierError{source, err, lines, truncated}
}
// VerifierError includes information from the eBPF verifier.
//
// It summarises the log output, see Format if you want to output the full contents.
type VerifierError struct {
source string
// The error which caused this error.
Cause error
// The verifier output split into lines.
@@ -67,9 +61,12 @@ func (le *VerifierError) Error() string {
log = log[:n-1]
}
var b strings.Builder
fmt.Fprintf(&b, "%s: %s", le.source, le.Cause.Error())
n := len(log)
if n == 0 {
return le.Cause.Error()
return b.String()
}
lines := log[n-1:]
@@ -78,14 +75,9 @@ func (le *VerifierError) Error() string {
lines = log[n-2:]
}
var b strings.Builder
fmt.Fprintf(&b, "%s: ", le.Cause.Error())
for i, line := range lines {
for _, line := range lines {
b.WriteString(": ")
b.WriteString(strings.TrimSpace(line))
if i != len(lines)-1 {
b.WriteString(": ")
}
}
omitted := len(le.Log) - len(lines)
@@ -143,8 +135,8 @@ func includePreviousLine(line string) bool {
// Understood verbs are %s and %v, which are equivalent to calling Error(). %v
// allows outputting additional information using the following flags:
//
// + Output the first <width> lines, or all lines if no width is given.
// - Output the last <width> lines, or all lines if no width is given.
// %+<width>v: Output the first <width> lines, or all lines if no width is given.
// %-<width>v: Output the last <width> lines, or all lines if no width is given.
//
// Use width to specify how many lines to output. Use the '-' flag to output
// lines from the end of the log instead of the beginning.
@@ -174,7 +166,7 @@ func (le *VerifierError) Format(f fmt.State, verb rune) {
return
}
fmt.Fprintf(f, "%s:", le.Cause.Error())
fmt.Fprintf(f, "%s: %s:", le.source, le.Cause.Error())
omitted := len(le.Log) - n
lines := le.Log[:n]

View File

@@ -31,10 +31,20 @@ func (ufe *UnsupportedFeatureError) Is(target error) bool {
return target == ErrNotSupported
}
type featureTest struct {
sync.RWMutex
successful bool
result error
// FeatureTest caches the result of a [FeatureTestFn].
//
// Fields should not be modified after creation.
type FeatureTest struct {
// The name of the feature being detected.
Name string
// Version in in the form Major.Minor[.Patch].
Version string
// The feature test itself.
Fn FeatureTestFn
mu sync.RWMutex
done bool
result error
}
// FeatureTestFn is used to determine whether the kernel supports
@@ -42,59 +52,133 @@ type featureTest struct {
//
// The return values have the following semantics:
//
// err == ErrNotSupported: the feature is not available
// err == nil: the feature is available
// err != nil: the test couldn't be executed
// err == ErrNotSupported: the feature is not available
// err == nil: the feature is available
// err != nil: the test couldn't be executed
type FeatureTestFn func() error
// FeatureTest wraps a function so that it is run at most once.
// NewFeatureTest is a convenient way to create a single [FeatureTest].
func NewFeatureTest(name, version string, fn FeatureTestFn) func() error {
ft := &FeatureTest{
Name: name,
Version: version,
Fn: fn,
}
return ft.execute
}
// execute the feature test.
//
// name should identify the tested feature, while version must be in the
// form Major.Minor[.Patch].
// The result is cached if the test is conclusive.
//
// Returns an error wrapping ErrNotSupported if the feature is not supported.
func FeatureTest(name, version string, fn FeatureTestFn) func() error {
ft := new(featureTest)
return func() error {
ft.RLock()
if ft.successful {
defer ft.RUnlock()
return ft.result
}
ft.RUnlock()
ft.Lock()
defer ft.Unlock()
// check one more time on the off
// chance that two go routines
// were able to call into the write
// lock
if ft.successful {
return ft.result
}
err := fn()
switch {
case errors.Is(err, ErrNotSupported):
v, err := NewVersion(version)
// See [FeatureTestFn] for the meaning of the returned error.
func (ft *FeatureTest) execute() error {
ft.mu.RLock()
result, done := ft.result, ft.done
ft.mu.RUnlock()
if done {
return result
}
ft.mu.Lock()
defer ft.mu.Unlock()
// The test may have been executed by another caller while we were
// waiting to acquire ft.mu.
if ft.done {
return ft.result
}
err := ft.Fn()
if err == nil {
ft.done = true
return nil
}
if errors.Is(err, ErrNotSupported) {
var v Version
if ft.Version != "" {
v, err = NewVersion(ft.Version)
if err != nil {
return err
return fmt.Errorf("feature %s: %w", ft.Name, err)
}
}
ft.result = &UnsupportedFeatureError{
MinimumVersion: v,
Name: name,
}
fallthrough
case err == nil:
ft.successful = true
default:
// We couldn't execute the feature test to a point
// where it could make a determination.
// Don't cache the result, just return it.
return fmt.Errorf("detect support for %s: %w", name, err)
ft.done = true
ft.result = &UnsupportedFeatureError{
MinimumVersion: v,
Name: ft.Name,
}
return ft.result
}
// We couldn't execute the feature test to a point
// where it could make a determination.
// Don't cache the result, just return it.
return fmt.Errorf("detect support for %s: %w", ft.Name, err)
}
// FeatureMatrix groups multiple related feature tests into a map.
//
// Useful when there is a small number of discrete features which are known
// at compile time.
//
// It must not be modified concurrently with calling [FeatureMatrix.Result].
type FeatureMatrix[K comparable] map[K]*FeatureTest
// Result returns the outcome of the feature test for the given key.
//
// It's safe to call this function concurrently.
func (fm FeatureMatrix[K]) Result(key K) error {
ft, ok := fm[key]
if !ok {
return fmt.Errorf("no feature probe for %v", key)
}
return ft.execute()
}
// FeatureCache caches a potentially unlimited number of feature probes.
//
// Useful when there is a high cardinality for a feature test.
type FeatureCache[K comparable] struct {
mu sync.RWMutex
newTest func(K) *FeatureTest
features map[K]*FeatureTest
}
func NewFeatureCache[K comparable](newTest func(K) *FeatureTest) *FeatureCache[K] {
return &FeatureCache[K]{
newTest: newTest,
features: make(map[K]*FeatureTest),
}
}
func (fc *FeatureCache[K]) Result(key K) error {
// NB: Executing the feature test happens without fc.mu taken.
return fc.retrieve(key).execute()
}
func (fc *FeatureCache[K]) retrieve(key K) *FeatureTest {
fc.mu.RLock()
ft := fc.features[key]
fc.mu.RUnlock()
if ft != nil {
return ft
}
fc.mu.Lock()
defer fc.mu.Unlock()
if ft := fc.features[key]; ft != nil {
return ft
}
ft = fc.newTest(key)
fc.features[key] = ft
return ft
}

View File

@@ -2,10 +2,14 @@ package internal
import (
"bufio"
"bytes"
"compress/gzip"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"sync"
)
// NewBufferedSectionReader wraps an io.ReaderAt in an appropriately-sized
@@ -60,3 +64,65 @@ func ReadAllCompressed(file string) ([]byte, error) {
return io.ReadAll(gz)
}
// ReadUint64FromFile reads a uint64 from a file.
//
// format specifies the contents of the file in fmt.Scanf syntax.
func ReadUint64FromFile(format string, path ...string) (uint64, error) {
filename := filepath.Join(path...)
data, err := os.ReadFile(filename)
if err != nil {
return 0, fmt.Errorf("reading file %q: %w", filename, err)
}
var value uint64
n, err := fmt.Fscanf(bytes.NewReader(data), format, &value)
if err != nil {
return 0, fmt.Errorf("parsing file %q: %w", filename, err)
}
if n != 1 {
return 0, fmt.Errorf("parsing file %q: expected 1 item, got %d", filename, n)
}
return value, nil
}
type uint64FromFileKey struct {
format, path string
}
var uint64FromFileCache = struct {
sync.RWMutex
values map[uint64FromFileKey]uint64
}{
values: map[uint64FromFileKey]uint64{},
}
// ReadUint64FromFileOnce is like readUint64FromFile but memoizes the result.
func ReadUint64FromFileOnce(format string, path ...string) (uint64, error) {
filename := filepath.Join(path...)
key := uint64FromFileKey{format, filename}
uint64FromFileCache.RLock()
if value, ok := uint64FromFileCache.values[key]; ok {
uint64FromFileCache.RUnlock()
return value, nil
}
uint64FromFileCache.RUnlock()
value, err := ReadUint64FromFile(format, filename)
if err != nil {
return 0, err
}
uint64FromFileCache.Lock()
defer uint64FromFileCache.Unlock()
if value, ok := uint64FromFileCache.values[key]; ok {
// Someone else got here before us, use what is cached.
return value, nil
}
uint64FromFileCache.values[key] = value
return value, nil
}

View File

@@ -0,0 +1,267 @@
package kconfig
import (
"bufio"
"bytes"
"compress/gzip"
"fmt"
"io"
"math"
"os"
"strconv"
"strings"
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
)
// Find find a kconfig file on the host.
// It first reads from /boot/config- of the current running kernel and tries
// /proc/config.gz if nothing was found in /boot.
// If none of the file provide a kconfig, it returns an error.
func Find() (*os.File, error) {
kernelRelease, err := internal.KernelRelease()
if err != nil {
return nil, fmt.Errorf("cannot get kernel release: %w", err)
}
path := "/boot/config-" + kernelRelease
f, err := os.Open(path)
if err == nil {
return f, nil
}
f, err = os.Open("/proc/config.gz")
if err == nil {
return f, nil
}
return nil, fmt.Errorf("neither %s nor /proc/config.gz provide a kconfig", path)
}
// Parse parses the kconfig file for which a reader is given.
// All the CONFIG_* which are in filter and which are set set will be
// put in the returned map as key with their corresponding value as map value.
// If filter is nil, no filtering will occur.
// If the kconfig file is not valid, error will be returned.
func Parse(source io.ReaderAt, filter map[string]struct{}) (map[string]string, error) {
var r io.Reader
zr, err := gzip.NewReader(io.NewSectionReader(source, 0, math.MaxInt64))
if err != nil {
r = io.NewSectionReader(source, 0, math.MaxInt64)
} else {
// Source is gzip compressed, transparently decompress.
r = zr
}
ret := make(map[string]string, len(filter))
s := bufio.NewScanner(r)
for s.Scan() {
line := s.Bytes()
err = processKconfigLine(line, ret, filter)
if err != nil {
return nil, fmt.Errorf("cannot parse line: %w", err)
}
if filter != nil && len(ret) == len(filter) {
break
}
}
if err := s.Err(); err != nil {
return nil, fmt.Errorf("cannot parse: %w", err)
}
if zr != nil {
return ret, zr.Close()
}
return ret, nil
}
// Golang translation of libbpf bpf_object__process_kconfig_line():
// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/libbpf.c#L1874
// It does the same checks but does not put the data inside the BPF map.
func processKconfigLine(line []byte, m map[string]string, filter map[string]struct{}) error {
// Ignore empty lines and "# CONFIG_* is not set".
if !bytes.HasPrefix(line, []byte("CONFIG_")) {
return nil
}
key, value, found := bytes.Cut(line, []byte{'='})
if !found {
return fmt.Errorf("line %q does not contain separator '='", line)
}
if len(value) == 0 {
return fmt.Errorf("line %q has no value", line)
}
if filter != nil {
// NB: map[string(key)] gets special optimisation help from the compiler
// and doesn't allocate. Don't turn this into a variable.
_, ok := filter[string(key)]
if !ok {
return nil
}
}
// This can seem odd, but libbpf only sets the value the first time the key is
// met:
// https://github.com/torvalds/linux/blob/0d85b27b0cc6/tools/lib/bpf/libbpf.c#L1906-L1908
_, ok := m[string(key)]
if !ok {
m[string(key)] = string(value)
}
return nil
}
// PutValue translates the value given as parameter depending on the BTF
// type, the translated value is then written to the byte array.
func PutValue(data []byte, typ btf.Type, value string) error {
typ = btf.UnderlyingType(typ)
switch value {
case "y", "n", "m":
return putValueTri(data, typ, value)
default:
if strings.HasPrefix(value, `"`) {
return putValueString(data, typ, value)
}
return putValueNumber(data, typ, value)
}
}
// Golang translation of libbpf_tristate enum:
// https://github.com/libbpf/libbpf/blob/fbd60dbff51c870f5e80a17c4f2fd639eb80af90/src/bpf_helpers.h#L169
type triState int
const (
TriNo triState = 0
TriYes triState = 1
TriModule triState = 2
)
func putValueTri(data []byte, typ btf.Type, value string) error {
switch v := typ.(type) {
case *btf.Int:
if v.Encoding != btf.Bool {
return fmt.Errorf("cannot add tri value, expected btf.Bool, got: %v", v.Encoding)
}
if v.Size != 1 {
return fmt.Errorf("cannot add tri value, expected size of 1 byte, got: %d", v.Size)
}
switch value {
case "y":
data[0] = 1
case "n":
data[0] = 0
default:
return fmt.Errorf("cannot use %q for btf.Bool", value)
}
case *btf.Enum:
if v.Name != "libbpf_tristate" {
return fmt.Errorf("cannot use enum %q, only libbpf_tristate is supported", v.Name)
}
var tri triState
switch value {
case "y":
tri = TriYes
case "m":
tri = TriModule
case "n":
tri = TriNo
default:
return fmt.Errorf("value %q is not support for libbpf_tristate", value)
}
internal.NativeEndian.PutUint64(data, uint64(tri))
default:
return fmt.Errorf("cannot add number value, expected btf.Int or btf.Enum, got: %T", v)
}
return nil
}
func putValueString(data []byte, typ btf.Type, value string) error {
array, ok := typ.(*btf.Array)
if !ok {
return fmt.Errorf("cannot add string value, expected btf.Array, got %T", array)
}
contentType, ok := btf.UnderlyingType(array.Type).(*btf.Int)
if !ok {
return fmt.Errorf("cannot add string value, expected array of btf.Int, got %T", contentType)
}
// Any Int, which is not bool, of one byte could be used to store char:
// https://github.com/torvalds/linux/blob/1a5304fecee5/tools/lib/bpf/libbpf.c#L3637-L3638
if contentType.Size != 1 && contentType.Encoding != btf.Bool {
return fmt.Errorf("cannot add string value, expected array of btf.Int of size 1, got array of btf.Int of size: %v", contentType.Size)
}
if !strings.HasPrefix(value, `"`) || !strings.HasSuffix(value, `"`) {
return fmt.Errorf(`value %q must start and finish with '"'`, value)
}
str := strings.Trim(value, `"`)
// We need to trim string if the bpf array is smaller.
if uint32(len(str)) >= array.Nelems {
str = str[:array.Nelems]
}
// Write the string content to .kconfig.
copy(data, str)
return nil
}
func putValueNumber(data []byte, typ btf.Type, value string) error {
integer, ok := typ.(*btf.Int)
if !ok {
return fmt.Errorf("cannot add number value, expected *btf.Int, got: %T", integer)
}
size := integer.Size
sizeInBits := size * 8
var n uint64
var err error
if integer.Encoding == btf.Signed {
parsed, e := strconv.ParseInt(value, 0, int(sizeInBits))
n = uint64(parsed)
err = e
} else {
parsed, e := strconv.ParseUint(value, 0, int(sizeInBits))
n = uint64(parsed)
err = e
}
if err != nil {
return fmt.Errorf("cannot parse value: %w", err)
}
switch size {
case 1:
data[0] = byte(n)
case 2:
internal.NativeEndian.PutUint16(data, uint16(n))
case 4:
internal.NativeEndian.PutUint32(data, uint32(n))
case 8:
internal.NativeEndian.PutUint64(data, uint64(n))
default:
return fmt.Errorf("size (%d) is not valid, expected: 1, 2, 4 or 8", size)
}
return nil
}

26
vendor/github.com/cilium/ebpf/internal/memoize.go generated vendored Normal file
View File

@@ -0,0 +1,26 @@
package internal
import (
"sync"
)
type memoizedFunc[T any] struct {
once sync.Once
fn func() (T, error)
result T
err error
}
func (mf *memoizedFunc[T]) do() (T, error) {
mf.once.Do(func() {
mf.result, mf.err = mf.fn()
})
return mf.result, mf.err
}
// Memoize the result of a function call.
//
// fn is only ever called once, even if it returns an error.
func Memoize[T any](fn func() (T, error)) func() (T, error) {
return (&memoizedFunc[T]{fn: fn}).do
}

View File

@@ -6,6 +6,7 @@ import (
"go/format"
"go/scanner"
"io"
"reflect"
"strings"
"unicode"
)
@@ -82,3 +83,15 @@ func WriteFormatted(src []byte, out io.Writer) error {
return nel
}
// GoTypeName is like %T, but elides the package name.
//
// Pointers to a type are peeled off.
func GoTypeName(t any) string {
rT := reflect.TypeOf(t)
for rT.Kind() == reflect.Pointer {
rT = rT.Elem()
}
// Doesn't return the correct Name for generic types due to https://github.com/golang/go/issues/55924
return rT.Name()
}

View File

@@ -6,15 +6,12 @@ import (
"os"
"path/filepath"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/unix"
)
func Pin(currentPath, newPath string, fd *sys.FD) error {
const bpfFSType = 0xcafe4a11
if newPath == "" {
return errors.New("given pinning path cannot be empty")
}
@@ -22,20 +19,11 @@ func Pin(currentPath, newPath string, fd *sys.FD) error {
return nil
}
var statfs unix.Statfs_t
if err := unix.Statfs(filepath.Dir(newPath), &statfs); err != nil {
fsType, err := FSType(filepath.Dir(newPath))
if err != nil {
return err
}
fsType := int64(statfs.Type)
if unsafe.Sizeof(statfs.Type) == 4 {
// We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a
// negative number when interpreted as int32 so we need to cast via
// uint32 to avoid sign extension.
fsType = int64(uint32(statfs.Type))
}
if fsType != bpfFSType {
if fsType != unix.BPF_FS_MAGIC {
return fmt.Errorf("%s is not on a bpf filesystem", newPath)
}
@@ -50,7 +38,7 @@ func Pin(currentPath, newPath string, fd *sys.FD) error {
// Renameat2 is used instead of os.Rename to disallow the new path replacing
// an existing path.
err := unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE)
err = unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE)
if err == nil {
// Object is now moved to the new pinning path.
return nil

43
vendor/github.com/cilium/ebpf/internal/platform.go generated vendored Normal file
View File

@@ -0,0 +1,43 @@
package internal
import (
"runtime"
)
// PlatformPrefix returns the platform-dependent syscall wrapper prefix used by
// the linux kernel.
//
// Based on https://github.com/golang/go/blob/master/src/go/build/syslist.go
// and https://github.com/libbpf/libbpf/blob/master/src/libbpf.c#L10047
func PlatformPrefix() string {
switch runtime.GOARCH {
case "386":
return "__ia32_"
case "amd64", "amd64p32":
return "__x64_"
case "arm", "armbe":
return "__arm_"
case "arm64", "arm64be":
return "__arm64_"
case "mips", "mipsle", "mips64", "mips64le", "mips64p32", "mips64p32le":
return "__mips_"
case "s390":
return "__s390_"
case "s390x":
return "__s390x_"
case "riscv", "riscv64":
return "__riscv_"
case "ppc":
return "__powerpc_"
case "ppc64", "ppc64le":
return "__powerpc64_"
default:
return ""
}
}

11
vendor/github.com/cilium/ebpf/internal/prog.go generated vendored Normal file
View File

@@ -0,0 +1,11 @@
package internal
// EmptyBPFContext is the smallest-possible BPF input context to be used for
// invoking `Program.{Run,Benchmark,Test}`.
//
// Programs require a context input buffer of at least 15 bytes. Looking in
// net/bpf/test_run.c, bpf_test_init() requires that the input is at least
// ETH_HLEN (14) bytes. As of Linux commit fd18942 ("bpf: Don't redirect packets
// with invalid pkt_len"), it also requires the skb to be non-empty after
// removing the Layer 2 header.
var EmptyBPFContext = make([]byte, 15)

23
vendor/github.com/cilium/ebpf/internal/statfs.go generated vendored Normal file
View File

@@ -0,0 +1,23 @@
package internal
import (
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
func FSType(path string) (int64, error) {
var statfs unix.Statfs_t
if err := unix.Statfs(path, &statfs); err != nil {
return 0, err
}
fsType := int64(statfs.Type)
if unsafe.Sizeof(statfs.Type) == 4 {
// We're on a 32 bit arch, where statfs.Type is int32. bpfFSType is a
// negative number when interpreted as int32 so we need to cast via
// uint32 to avoid sign extension.
fsType = int64(uint32(statfs.Type))
}
return fsType, nil
}

View File

@@ -17,11 +17,39 @@ type FD struct {
}
func newFD(value int) *FD {
if onLeakFD != nil {
// Attempt to store the caller's stack for the given fd value.
// Panic if fds contains an existing stack for the fd.
old, exist := fds.LoadOrStore(value, callersFrames())
if exist {
f := old.(*runtime.Frames)
panic(fmt.Sprintf("found existing stack for fd %d:\n%s", value, FormatFrames(f)))
}
}
fd := &FD{value}
runtime.SetFinalizer(fd, (*FD).Close)
runtime.SetFinalizer(fd, (*FD).finalize)
return fd
}
// finalize is set as the FD's runtime finalizer and
// sends a leak trace before calling FD.Close().
func (fd *FD) finalize() {
if fd.raw < 0 {
return
}
// Invoke the fd leak callback. Calls LoadAndDelete to guarantee the callback
// is invoked at most once for one sys.FD allocation, runtime.Frames can only
// be unwound once.
f, ok := fds.LoadAndDelete(fd.Int())
if ok && onLeakFD != nil {
onLeakFD(f.(*runtime.Frames))
}
_ = fd.Close()
}
// NewFD wraps a raw fd with a finalizer.
//
// You must not use the raw fd after calling this function, since the underlying
@@ -64,15 +92,16 @@ func (fd *FD) Close() error {
return nil
}
value := int(fd.raw)
fd.raw = -1
fd.Forget()
return unix.Close(value)
return unix.Close(fd.disown())
}
func (fd *FD) Forget() {
func (fd *FD) disown() int {
value := int(fd.raw)
fds.Delete(int(value))
fd.raw = -1
runtime.SetFinalizer(fd, nil)
return value
}
func (fd *FD) Dup() (*FD, error) {
@@ -90,7 +119,15 @@ func (fd *FD) Dup() (*FD, error) {
return newFD(dup), nil
}
// File takes ownership of FD and turns it into an [*os.File].
//
// You must not use the FD after the call returns.
//
// Returns nil if the FD is not valid.
func (fd *FD) File(name string) *os.File {
fd.Forget()
return os.NewFile(uintptr(fd.raw), name)
if fd.raw < 0 {
return nil
}
return os.NewFile(uintptr(fd.disown()), name)
}

93
vendor/github.com/cilium/ebpf/internal/sys/fd_trace.go generated vendored Normal file
View File

@@ -0,0 +1,93 @@
package sys
import (
"bytes"
"fmt"
"runtime"
"sync"
)
// OnLeakFD controls tracing [FD] lifetime to detect resources that are not
// closed by Close().
//
// If fn is not nil, tracing is enabled for all FDs created going forward. fn is
// invoked for all FDs that are closed by the garbage collector instead of an
// explicit Close() by a caller. Calling OnLeakFD twice with a non-nil fn
// (without disabling tracing in the meantime) will cause a panic.
//
// If fn is nil, tracing will be disabled. Any FDs that have not been closed are
// considered to be leaked, fn will be invoked for them, and the process will be
// terminated.
//
// fn will be invoked at most once for every unique sys.FD allocation since a
// runtime.Frames can only be unwound once.
func OnLeakFD(fn func(*runtime.Frames)) {
// Enable leak tracing if new fn is provided.
if fn != nil {
if onLeakFD != nil {
panic("OnLeakFD called twice with non-nil fn")
}
onLeakFD = fn
return
}
// fn is nil past this point.
if onLeakFD == nil {
return
}
// Call onLeakFD for all open fds.
if fs := flushFrames(); len(fs) != 0 {
for _, f := range fs {
onLeakFD(f)
}
}
onLeakFD = nil
}
var onLeakFD func(*runtime.Frames)
// fds is a registry of all file descriptors wrapped into sys.fds that were
// created while an fd tracer was active.
var fds sync.Map // map[int]*runtime.Frames
// flushFrames removes all elements from fds and returns them as a slice. This
// deals with the fact that a runtime.Frames can only be unwound once using
// Next().
func flushFrames() []*runtime.Frames {
var frames []*runtime.Frames
fds.Range(func(key, value any) bool {
frames = append(frames, value.(*runtime.Frames))
fds.Delete(key)
return true
})
return frames
}
func callersFrames() *runtime.Frames {
c := make([]uintptr, 32)
// Skip runtime.Callers and this function.
i := runtime.Callers(2, c)
if i == 0 {
return nil
}
return runtime.CallersFrames(c)
}
// FormatFrames formats a runtime.Frames as a human-readable string.
func FormatFrames(fs *runtime.Frames) string {
var b bytes.Buffer
for {
f, more := fs.Next()
b.WriteString(fmt.Sprintf("\t%s+%#x\n\t\t%s:%d\n", f.Function, f.PC-f.Entry, f.File, f.Line))
if !more {
break
}
}
return b.String()
}

View File

@@ -0,0 +1,49 @@
// Code generated by "stringer -type MapFlags"; DO NOT EDIT.
package sys
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[BPF_F_NO_PREALLOC-1]
_ = x[BPF_F_NO_COMMON_LRU-2]
_ = x[BPF_F_NUMA_NODE-4]
_ = x[BPF_F_RDONLY-8]
_ = x[BPF_F_WRONLY-16]
_ = x[BPF_F_STACK_BUILD_ID-32]
_ = x[BPF_F_ZERO_SEED-64]
_ = x[BPF_F_RDONLY_PROG-128]
_ = x[BPF_F_WRONLY_PROG-256]
_ = x[BPF_F_CLONE-512]
_ = x[BPF_F_MMAPABLE-1024]
_ = x[BPF_F_PRESERVE_ELEMS-2048]
_ = x[BPF_F_INNER_MAP-4096]
}
const _MapFlags_name = "BPF_F_NO_PREALLOCBPF_F_NO_COMMON_LRUBPF_F_NUMA_NODEBPF_F_RDONLYBPF_F_WRONLYBPF_F_STACK_BUILD_IDBPF_F_ZERO_SEEDBPF_F_RDONLY_PROGBPF_F_WRONLY_PROGBPF_F_CLONEBPF_F_MMAPABLEBPF_F_PRESERVE_ELEMSBPF_F_INNER_MAP"
var _MapFlags_map = map[MapFlags]string{
1: _MapFlags_name[0:17],
2: _MapFlags_name[17:36],
4: _MapFlags_name[36:51],
8: _MapFlags_name[51:63],
16: _MapFlags_name[63:75],
32: _MapFlags_name[75:95],
64: _MapFlags_name[95:110],
128: _MapFlags_name[110:127],
256: _MapFlags_name[127:144],
512: _MapFlags_name[144:155],
1024: _MapFlags_name[155:169],
2048: _MapFlags_name[169:189],
4096: _MapFlags_name[189:204],
}
func (i MapFlags) String() string {
if str, ok := _MapFlags_map[i]; ok {
return str
}
return "MapFlags(" + strconv.FormatInt(int64(i), 10) + ")"
}

View File

@@ -20,7 +20,7 @@ func NewSlicePointer(buf []byte) Pointer {
return Pointer{ptr: unsafe.Pointer(&buf[0])}
}
// NewSlicePointer creates a 64-bit pointer from a byte slice.
// NewSlicePointerLen creates a 64-bit pointer from a byte slice.
//
// Useful to assign both the pointer and the length in one go.
func NewSlicePointerLen(buf []byte) (Pointer, uint32) {
@@ -36,3 +36,17 @@ func NewStringPointer(str string) Pointer {
return Pointer{ptr: unsafe.Pointer(p)}
}
// NewStringSlicePointer allocates an array of Pointers to each string in the
// given slice of strings and returns a 64-bit pointer to the start of the
// resulting array.
//
// Use this function to pass arrays of strings as syscall arguments.
func NewStringSlicePointer(strings []string) Pointer {
sp := make([]Pointer, 0, len(strings))
for _, s := range strings {
sp = append(sp, NewStringPointer(s))
}
return Pointer{ptr: unsafe.Pointer(&sp[0])}
}

View File

@@ -1,5 +1,4 @@
//go:build armbe || mips || mips64p32
// +build armbe mips mips64p32
package sys

View File

@@ -1,5 +1,4 @@
//go:build 386 || amd64p32 || arm || mipsle || mips64p32le
// +build 386 amd64p32 arm mipsle mips64p32le
package sys

View File

@@ -1,5 +1,4 @@
//go:build !386 && !amd64p32 && !arm && !mipsle && !mips64p32le && !armbe && !mips && !mips64p32
// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le,!armbe,!mips,!mips64p32
package sys

83
vendor/github.com/cilium/ebpf/internal/sys/signals.go generated vendored Normal file
View File

@@ -0,0 +1,83 @@
package sys
import (
"fmt"
"runtime"
"unsafe"
"github.com/cilium/ebpf/internal/unix"
)
// A sigset containing only SIGPROF.
var profSet unix.Sigset_t
func init() {
// See sigsetAdd for details on the implementation. Open coded here so
// that the compiler will check the constant calculations for us.
profSet.Val[sigprofBit/wordBits] |= 1 << (sigprofBit % wordBits)
}
// maskProfilerSignal locks the calling goroutine to its underlying OS thread
// and adds SIGPROF to the thread's signal mask. This prevents pprof from
// interrupting expensive syscalls like e.g. BPF_PROG_LOAD.
//
// The caller must defer unmaskProfilerSignal() to reverse the operation.
func maskProfilerSignal() {
runtime.LockOSThread()
if err := unix.PthreadSigmask(unix.SIG_BLOCK, &profSet, nil); err != nil {
runtime.UnlockOSThread()
panic(fmt.Errorf("masking profiler signal: %w", err))
}
}
// unmaskProfilerSignal removes SIGPROF from the underlying thread's signal
// mask, allowing it to be interrupted for profiling once again.
//
// It also unlocks the current goroutine from its underlying OS thread.
func unmaskProfilerSignal() {
defer runtime.UnlockOSThread()
if err := unix.PthreadSigmask(unix.SIG_UNBLOCK, &profSet, nil); err != nil {
panic(fmt.Errorf("unmasking profiler signal: %w", err))
}
}
const (
// Signal is the nth bit in the bitfield.
sigprofBit = int(unix.SIGPROF - 1)
// The number of bits in one Sigset_t word.
wordBits = int(unsafe.Sizeof(unix.Sigset_t{}.Val[0])) * 8
)
// sigsetAdd adds signal to set.
//
// Note: Sigset_t.Val's value type is uint32 or uint64 depending on the arch.
// This function must be able to deal with both and so must avoid any direct
// references to u32 or u64 types.
func sigsetAdd(set *unix.Sigset_t, signal unix.Signal) error {
if signal < 1 {
return fmt.Errorf("signal %d must be larger than 0", signal)
}
// For amd64, runtime.sigaddset() performs the following operation:
// set[(signal-1)/32] |= 1 << ((uint32(signal) - 1) & 31)
//
// This trick depends on sigset being two u32's, causing a signal in the the
// bottom 31 bits to be written to the low word if bit 32 is low, or the high
// word if bit 32 is high.
// Signal is the nth bit in the bitfield.
bit := int(signal - 1)
// Word within the sigset the bit needs to be written to.
word := bit / wordBits
if word >= len(set.Val) {
return fmt.Errorf("signal %d does not fit within unix.Sigset_t", signal)
}
// Write the signal bit into its corresponding word at the corrected offset.
set.Val[word] |= 1 << (bit % wordBits)
return nil
}

View File

@@ -8,10 +8,22 @@ import (
"github.com/cilium/ebpf/internal/unix"
)
// ENOTSUPP is a Linux internal error code that has leaked into UAPI.
//
// It is not the same as ENOTSUP or EOPNOTSUPP.
var ENOTSUPP = syscall.Errno(524)
// BPF wraps SYS_BPF.
//
// Any pointers contained in attr must use the Pointer type from this package.
func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
// Prevent the Go profiler from repeatedly interrupting the verifier,
// which could otherwise lead to a livelock due to receiving EAGAIN.
if cmd == BPF_PROG_LOAD || cmd == BPF_PROG_RUN {
maskProfilerSignal()
defer unmaskProfilerSignal()
}
for {
r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size)
runtime.KeepAlive(attr)
@@ -33,10 +45,10 @@ func BPF(cmd Cmd, attr unsafe.Pointer, size uintptr) (uintptr, error) {
// Info is implemented by all structs that can be passed to the ObjInfo syscall.
//
// MapInfo
// ProgInfo
// LinkInfo
// BtfInfo
// MapInfo
// ProgInfo
// LinkInfo
// BtfInfo
type Info interface {
info() (unsafe.Pointer, uint32)
}
@@ -90,12 +102,45 @@ func NewObjName(name string) ObjName {
return result
}
// LogLevel controls the verbosity of the kernel's eBPF program verifier.
type LogLevel uint32
const (
BPF_LOG_LEVEL1 LogLevel = 1 << iota
BPF_LOG_LEVEL2
BPF_LOG_STATS
)
// LinkID uniquely identifies a bpf_link.
type LinkID uint32
// BTFID uniquely identifies a BTF blob loaded into the kernel.
type BTFID uint32
// TypeID identifies a type in a BTF blob.
type TypeID uint32
// MapFlags control map behaviour.
type MapFlags uint32
//go:generate stringer -type MapFlags
const (
BPF_F_NO_PREALLOC MapFlags = 1 << iota
BPF_F_NO_COMMON_LRU
BPF_F_NUMA_NODE
BPF_F_RDONLY
BPF_F_WRONLY
BPF_F_STACK_BUILD_ID
BPF_F_ZERO_SEED
BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG
BPF_F_CLONE
BPF_F_MMAPABLE
BPF_F_PRESERVE_ELEMS
BPF_F_INNER_MAP
)
// wrappedErrno wraps syscall.Errno to prevent direct comparisons with
// syscall.E* or unix.E* constants.
//
@@ -108,6 +153,13 @@ func (we wrappedErrno) Unwrap() error {
return we.Errno
}
func (we wrappedErrno) Error() string {
if we.Errno == ENOTSUPP {
return "operation not supported"
}
return we.Errno.Error()
}
type syscallError struct {
error
errno syscall.Errno

View File

@@ -6,14 +6,14 @@ import (
"unsafe"
)
type AdjRoomMode int32
type AdjRoomMode uint32
const (
BPF_ADJ_ROOM_NET AdjRoomMode = 0
BPF_ADJ_ROOM_MAC AdjRoomMode = 1
)
type AttachType int32
type AttachType uint32
const (
BPF_CGROUP_INET_INGRESS AttachType = 0
@@ -62,7 +62,7 @@ const (
__MAX_BPF_ATTACH_TYPE AttachType = 43
)
type Cmd int32
type Cmd uint32
const (
BPF_MAP_CREATE Cmd = 0
@@ -104,7 +104,7 @@ const (
BPF_PROG_BIND_MAP Cmd = 35
)
type FunctionId int32
type FunctionId uint32
const (
BPF_FUNC_unspec FunctionId = 0
@@ -301,17 +301,27 @@ const (
BPF_FUNC_copy_from_user_task FunctionId = 191
BPF_FUNC_skb_set_tstamp FunctionId = 192
BPF_FUNC_ima_file_hash FunctionId = 193
__BPF_FUNC_MAX_ID FunctionId = 194
BPF_FUNC_kptr_xchg FunctionId = 194
BPF_FUNC_map_lookup_percpu_elem FunctionId = 195
BPF_FUNC_skc_to_mptcp_sock FunctionId = 196
BPF_FUNC_dynptr_from_mem FunctionId = 197
BPF_FUNC_ringbuf_reserve_dynptr FunctionId = 198
BPF_FUNC_ringbuf_submit_dynptr FunctionId = 199
BPF_FUNC_ringbuf_discard_dynptr FunctionId = 200
BPF_FUNC_dynptr_read FunctionId = 201
BPF_FUNC_dynptr_write FunctionId = 202
BPF_FUNC_dynptr_data FunctionId = 203
__BPF_FUNC_MAX_ID FunctionId = 204
)
type HdrStartOff int32
type HdrStartOff uint32
const (
BPF_HDR_START_MAC HdrStartOff = 0
BPF_HDR_START_NET HdrStartOff = 1
)
type LinkType int32
type LinkType uint32
const (
BPF_LINK_TYPE_UNSPEC LinkType = 0
@@ -323,10 +333,11 @@ const (
BPF_LINK_TYPE_XDP LinkType = 6
BPF_LINK_TYPE_PERF_EVENT LinkType = 7
BPF_LINK_TYPE_KPROBE_MULTI LinkType = 8
MAX_BPF_LINK_TYPE LinkType = 9
BPF_LINK_TYPE_STRUCT_OPS LinkType = 9
MAX_BPF_LINK_TYPE LinkType = 10
)
type MapType int32
type MapType uint32
const (
BPF_MAP_TYPE_UNSPEC MapType = 0
@@ -362,7 +373,7 @@ const (
BPF_MAP_TYPE_BLOOM_FILTER MapType = 30
)
type ProgType int32
type ProgType uint32
const (
BPF_PROG_TYPE_UNSPEC ProgType = 0
@@ -399,7 +410,7 @@ const (
BPF_PROG_TYPE_SYSCALL ProgType = 31
)
type RetCode int32
type RetCode uint32
const (
BPF_OK RetCode = 0
@@ -408,14 +419,14 @@ const (
BPF_LWT_REROUTE RetCode = 128
)
type SkAction int32
type SkAction uint32
const (
SK_DROP SkAction = 0
SK_PASS SkAction = 1
)
type StackBuildIdStatus int32
type StackBuildIdStatus uint32
const (
BPF_STACK_BUILD_ID_EMPTY StackBuildIdStatus = 0
@@ -423,13 +434,13 @@ const (
BPF_STACK_BUILD_ID_IP StackBuildIdStatus = 2
)
type StatsType int32
type StatsType uint32
const (
BPF_STATS_RUN_TIME StatsType = 0
)
type XdpAction int32
type XdpAction uint32
const (
XDP_ABORTED XdpAction = 0
@@ -474,15 +485,15 @@ type MapInfo struct {
KeySize uint32
ValueSize uint32
MaxEntries uint32
MapFlags uint32
MapFlags MapFlags
Name ObjName
Ifindex uint32
BtfVmlinuxValueTypeId uint32
BtfVmlinuxValueTypeId TypeID
NetnsDev uint64
NetnsIno uint64
BtfId uint32
BtfKeyTypeId uint32
BtfValueTypeId uint32
BtfKeyTypeId TypeID
BtfValueTypeId TypeID
_ [4]byte
MapExtra uint64
}
@@ -508,7 +519,7 @@ type ProgInfo struct {
NrJitedFuncLens uint32
JitedKsyms uint64
JitedFuncLens uint64
BtfId uint32
BtfId BTFID
FuncInfoRecSize uint32
FuncInfo uint64
NrFuncInfo uint32
@@ -616,7 +627,7 @@ type LinkCreateAttr struct {
TargetFd uint32
AttachType AttachType
Flags uint32
TargetBtfId uint32
TargetBtfId TypeID
_ [28]byte
}
@@ -646,6 +657,26 @@ func LinkCreateIter(attr *LinkCreateIterAttr) (*FD, error) {
return NewFD(int(fd))
}
type LinkCreateKprobeMultiAttr struct {
ProgFd uint32
TargetFd uint32
AttachType AttachType
Flags uint32
KprobeMultiFlags uint32
Count uint32
Syms Pointer
Addrs Pointer
Cookies Pointer
}
func LinkCreateKprobeMulti(attr *LinkCreateKprobeMultiAttr) (*FD, error) {
fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, err
}
return NewFD(int(fd))
}
type LinkCreatePerfEventAttr struct {
ProgFd uint32
TargetFd uint32
@@ -663,6 +694,25 @@ func LinkCreatePerfEvent(attr *LinkCreatePerfEventAttr) (*FD, error) {
return NewFD(int(fd))
}
type LinkCreateTracingAttr struct {
ProgFd uint32
TargetFd uint32
AttachType AttachType
Flags uint32
TargetBtfId BTFID
_ [4]byte
Cookie uint64
_ [16]byte
}
func LinkCreateTracing(attr *LinkCreateTracingAttr) (*FD, error) {
fd, err := BPF(BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
if err != nil {
return nil, err
}
return NewFD(int(fd))
}
type LinkUpdateAttr struct {
LinkFd uint32
NewProgFd uint32
@@ -680,15 +730,15 @@ type MapCreateAttr struct {
KeySize uint32
ValueSize uint32
MaxEntries uint32
MapFlags uint32
MapFlags MapFlags
InnerMapFd uint32
NumaNode uint32
MapName ObjName
MapIfindex uint32
BtfFd uint32
BtfKeyTypeId uint32
BtfValueTypeId uint32
BtfVmlinuxValueTypeId uint32
BtfKeyTypeId TypeID
BtfValueTypeId TypeID
BtfVmlinuxValueTypeId TypeID
MapExtra uint64
}
@@ -951,7 +1001,7 @@ type ProgLoadAttr struct {
InsnCnt uint32
Insns Pointer
License Pointer
LogLevel uint32
LogLevel LogLevel
LogSize uint32
LogBuf Pointer
KernVersion uint32
@@ -966,8 +1016,8 @@ type ProgLoadAttr struct {
LineInfoRecSize uint32
LineInfo Pointer
LineInfoCnt uint32
AttachBtfId uint32
AttachProgFd uint32
AttachBtfId TypeID
AttachBtfObjFd uint32
CoreReloCnt uint32
FdArray Pointer
CoreRelos Pointer
@@ -983,6 +1033,21 @@ func ProgLoad(attr *ProgLoadAttr) (*FD, error) {
return NewFD(int(fd))
}
type ProgQueryAttr struct {
TargetFd uint32
AttachType AttachType
QueryFlags uint32
AttachFlags uint32
ProgIds Pointer
ProgCount uint32
_ [4]byte
}
func ProgQuery(attr *ProgQueryAttr) error {
_, err := BPF(BPF_PROG_QUERY, unsafe.Pointer(attr), unsafe.Sizeof(*attr))
return err
}
type ProgRunAttr struct {
ProgFd uint32
Retval uint32
@@ -1046,7 +1111,7 @@ type RawTracepointLinkInfo struct {
type TracingLinkInfo struct {
AttachType AttachType
TargetObjId uint32
TargetBtfId uint32
TargetBtfId TypeID
}
type XDPLinkInfo struct{ Ifindex uint32 }

View File

@@ -0,0 +1,359 @@
package tracefs
import (
"crypto/rand"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"syscall"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/unix"
)
var (
ErrInvalidInput = errors.New("invalid input")
ErrInvalidMaxActive = errors.New("can only set maxactive on kretprobes")
)
//go:generate stringer -type=ProbeType -linecomment
type ProbeType uint8
const (
Kprobe ProbeType = iota // kprobe
Uprobe // uprobe
)
func (pt ProbeType) eventsFile() (*os.File, error) {
path, err := sanitizeTracefsPath(fmt.Sprintf("%s_events", pt.String()))
if err != nil {
return nil, err
}
return os.OpenFile(path, os.O_APPEND|os.O_WRONLY, 0666)
}
type ProbeArgs struct {
Type ProbeType
Symbol, Group, Path string
Offset, RefCtrOffset, Cookie uint64
Pid, RetprobeMaxActive int
Ret bool
}
// RandomGroup generates a pseudorandom string for use as a tracefs group name.
// Returns an error when the output string would exceed 63 characters (kernel
// limitation), when rand.Read() fails or when prefix contains characters not
// allowed by IsValidTraceID.
func RandomGroup(prefix string) (string, error) {
if !validIdentifier(prefix) {
return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, ErrInvalidInput)
}
b := make([]byte, 8)
if _, err := rand.Read(b); err != nil {
return "", fmt.Errorf("reading random bytes: %w", err)
}
group := fmt.Sprintf("%s_%x", prefix, b)
if len(group) > 63 {
return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, ErrInvalidInput)
}
return group, nil
}
// validIdentifier implements the equivalent of a regex match
// against "^[a-zA-Z_][0-9a-zA-Z_]*$".
//
// Trace event groups, names and kernel symbols must adhere to this set
// of characters. Non-empty, first character must not be a number, all
// characters must be alphanumeric or underscore.
func validIdentifier(s string) bool {
if len(s) < 1 {
return false
}
for i, c := range []byte(s) {
switch {
case c >= 'a' && c <= 'z':
case c >= 'A' && c <= 'Z':
case c == '_':
case i > 0 && c >= '0' && c <= '9':
default:
return false
}
}
return true
}
func sanitizeTracefsPath(path ...string) (string, error) {
base, err := getTracefsPath()
if err != nil {
return "", err
}
l := filepath.Join(path...)
p := filepath.Join(base, l)
if !strings.HasPrefix(p, base) {
return "", fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, ErrInvalidInput)
}
return p, nil
}
// getTracefsPath will return a correct path to the tracefs mount point.
// Since kernel 4.1 tracefs should be mounted by default at /sys/kernel/tracing,
// but may be also be available at /sys/kernel/debug/tracing if debugfs is mounted.
// The available tracefs paths will depends on distribution choices.
var getTracefsPath = internal.Memoize(func() (string, error) {
for _, p := range []struct {
path string
fsType int64
}{
{"/sys/kernel/tracing", unix.TRACEFS_MAGIC},
{"/sys/kernel/debug/tracing", unix.TRACEFS_MAGIC},
// RHEL/CentOS
{"/sys/kernel/debug/tracing", unix.DEBUGFS_MAGIC},
} {
if fsType, err := internal.FSType(p.path); err == nil && fsType == p.fsType {
return p.path, nil
}
}
return "", errors.New("neither debugfs nor tracefs are mounted")
})
// sanitizeIdentifier replaces every invalid character for the tracefs api with an underscore.
//
// It is equivalent to calling regexp.MustCompile("[^a-zA-Z0-9]+").ReplaceAllString("_").
func sanitizeIdentifier(s string) string {
var skip bool
return strings.Map(func(c rune) rune {
switch {
case c >= 'a' && c <= 'z',
c >= 'A' && c <= 'Z',
c >= '0' && c <= '9':
skip = false
return c
case skip:
return -1
default:
skip = true
return '_'
}
}, s)
}
// EventID reads a trace event's ID from tracefs given its group and name.
// The kernel requires group and name to be alphanumeric or underscore.
func EventID(group, name string) (uint64, error) {
if !validIdentifier(group) {
return 0, fmt.Errorf("invalid tracefs group: %q", group)
}
if !validIdentifier(name) {
return 0, fmt.Errorf("invalid tracefs name: %q", name)
}
path, err := sanitizeTracefsPath("events", group, name, "id")
if err != nil {
return 0, err
}
tid, err := internal.ReadUint64FromFile("%d\n", path)
if errors.Is(err, os.ErrNotExist) {
return 0, err
}
if err != nil {
return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err)
}
return tid, nil
}
func probePrefix(ret bool, maxActive int) string {
if ret {
if maxActive > 0 {
return fmt.Sprintf("r%d", maxActive)
}
return "r"
}
return "p"
}
// Event represents an entry in a tracefs probe events file.
type Event struct {
typ ProbeType
group, name string
// event id allocated by the kernel. 0 if the event has already been removed.
id uint64
}
// NewEvent creates a new ephemeral trace event.
//
// Returns os.ErrNotExist if symbol is not a valid
// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist
// if a probe with the same group and symbol already exists. Returns an error if
// args.RetprobeMaxActive is used on non kprobe types. Returns ErrNotSupported if
// the kernel is too old to support kretprobe maxactive.
func NewEvent(args ProbeArgs) (*Event, error) {
// Before attempting to create a trace event through tracefs,
// check if an event with the same group and name already exists.
// Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate
// entry, so we need to rely on reads for detecting uniqueness.
eventName := sanitizeIdentifier(args.Symbol)
_, err := EventID(args.Group, eventName)
if err == nil {
return nil, fmt.Errorf("trace event %s/%s: %w", args.Group, eventName, os.ErrExist)
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("checking trace event %s/%s: %w", args.Group, eventName, err)
}
// Open the kprobe_events file in tracefs.
f, err := args.Type.eventsFile()
if err != nil {
return nil, err
}
defer f.Close()
var pe, token string
switch args.Type {
case Kprobe:
// The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt):
// p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
// r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy
// p:ebpf_5678/p_my_kprobe __x64_sys_execve
//
// Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the
// kernel default to NR_CPUS. This is desired in most eBPF cases since
// subsampling or rate limiting logic can be more accurately implemented in
// the eBPF program itself.
// See Documentation/kprobes.txt for more details.
if args.RetprobeMaxActive != 0 && !args.Ret {
return nil, ErrInvalidMaxActive
}
token = KprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, args.RetprobeMaxActive), args.Group, eventName, token)
case Uprobe:
// The uprobe_events syntax is as follows:
// p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe
// r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe
// -:[GRP/]EVENT : Clear a probe
//
// Some examples:
// r:ebpf_1234/readline /bin/bash:0x12345
// p:ebpf_5678/main_mySymbol /bin/mybin:0x12345(0x123)
//
// See Documentation/trace/uprobetracer.txt for more details.
if args.RetprobeMaxActive != 0 {
return nil, ErrInvalidMaxActive
}
token = UprobeToken(args)
pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(args.Ret, 0), args.Group, eventName, token)
}
_, err = f.WriteString(pe)
// Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL
// when trying to create a retprobe for a missing symbol.
if errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("token %s: not found: %w", token, err)
}
// Since commit ab105a4fb894, EILSEQ is returned when a kprobe sym+offset is resolved
// to an invalid insn boundary. The exact conditions that trigger this error are
// arch specific however.
if errors.Is(err, syscall.EILSEQ) {
return nil, fmt.Errorf("token %s: bad insn boundary: %w", token, os.ErrNotExist)
}
// ERANGE is returned when the `SYM[+offs]` token is too big and cannot
// be resolved.
if errors.Is(err, syscall.ERANGE) {
return nil, fmt.Errorf("token %s: offset too big: %w", token, os.ErrNotExist)
}
if err != nil {
return nil, fmt.Errorf("token %s: writing '%s': %w", token, pe, err)
}
// Get the newly-created trace event's id.
tid, err := EventID(args.Group, eventName)
if args.RetprobeMaxActive != 0 && errors.Is(err, os.ErrNotExist) {
// Kernels < 4.12 don't support maxactive and therefore auto generate
// group and event names from the symbol and offset. The symbol is used
// without any sanitization.
// See https://elixir.bootlin.com/linux/v4.10/source/kernel/trace/trace_kprobe.c#L712
event := fmt.Sprintf("kprobes/r_%s_%d", args.Symbol, args.Offset)
if err := removeEvent(args.Type, event); err != nil {
return nil, fmt.Errorf("failed to remove spurious maxactive event: %s", err)
}
return nil, fmt.Errorf("create trace event with non-default maxactive: %w", internal.ErrNotSupported)
}
if err != nil {
return nil, fmt.Errorf("get trace event id: %w", err)
}
evt := &Event{args.Type, args.Group, eventName, tid}
runtime.SetFinalizer(evt, (*Event).Close)
return evt, nil
}
// Close removes the event from tracefs.
//
// Returns os.ErrClosed if the event has already been closed before.
func (evt *Event) Close() error {
if evt.id == 0 {
return os.ErrClosed
}
evt.id = 0
runtime.SetFinalizer(evt, nil)
pe := fmt.Sprintf("%s/%s", evt.group, evt.name)
return removeEvent(evt.typ, pe)
}
func removeEvent(typ ProbeType, pe string) error {
f, err := typ.eventsFile()
if err != nil {
return err
}
defer f.Close()
// See [k,u]probe_events syntax above. The probe type does not need to be specified
// for removals.
if _, err = f.WriteString("-:" + pe); err != nil {
return fmt.Errorf("remove event %q from %s: %w", pe, f.Name(), err)
}
return nil
}
// ID returns the tracefs ID associated with the event.
func (evt *Event) ID() uint64 {
return evt.id
}
// Group returns the tracefs group used by the event.
func (evt *Event) Group() string {
return evt.group
}
// KprobeToken creates the SYM[+offs] token for the tracefs api.
func KprobeToken(args ProbeArgs) string {
po := args.Symbol
if args.Offset != 0 {
po += fmt.Sprintf("+%#x", args.Offset)
}
return po
}

View File

@@ -0,0 +1,24 @@
// Code generated by "stringer -type=ProbeType -linecomment"; DO NOT EDIT.
package tracefs
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[Kprobe-0]
_ = x[Uprobe-1]
}
const _ProbeType_name = "kprobeuprobe"
var _ProbeType_index = [...]uint8{0, 6, 12}
func (i ProbeType) String() string {
if i >= ProbeType(len(_ProbeType_index)-1) {
return "ProbeType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _ProbeType_name[_ProbeType_index[i]:_ProbeType_index[i+1]]
}

View File

@@ -0,0 +1,16 @@
package tracefs
import "fmt"
// UprobeToken creates the PATH:OFFSET(REF_CTR_OFFSET) token for the tracefs api.
func UprobeToken(args ProbeArgs) string {
po := fmt.Sprintf("%s:%#x", args.Path, args.Offset)
if args.RefCtrOffset != 0 {
// This is not documented in Documentation/trace/uprobetracer.txt.
// elixir.bootlin.com/linux/v5.15-rc7/source/kernel/trace/trace.c#L5564
po += fmt.Sprintf("(%#x)", args.RefCtrOffset)
}
return po
}

11
vendor/github.com/cilium/ebpf/internal/unix/doc.go generated vendored Normal file
View File

@@ -0,0 +1,11 @@
// Package unix re-exports Linux specific parts of golang.org/x/sys/unix.
//
// It avoids breaking compilation on other OS by providing stubs as follows:
// - Invoking a function always returns an error.
// - Errnos have distinct, non-zero values.
// - Constants have distinct but meaningless values.
// - Types use the same names for members, but may or may not follow the
// Linux layout.
package unix
// Note: please don't add any custom API to this package. Use internal/sys instead.

View File

@@ -1,5 +1,4 @@
//go:build linux
// +build linux
package unix
@@ -10,189 +9,178 @@ import (
)
const (
ENOENT = linux.ENOENT
EEXIST = linux.EEXIST
EAGAIN = linux.EAGAIN
ENOSPC = linux.ENOSPC
EINVAL = linux.EINVAL
EPOLLIN = linux.EPOLLIN
EINTR = linux.EINTR
EPERM = linux.EPERM
ESRCH = linux.ESRCH
ENODEV = linux.ENODEV
EBADF = linux.EBADF
E2BIG = linux.E2BIG
EFAULT = linux.EFAULT
EACCES = linux.EACCES
// ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP
ENOTSUPP = syscall.Errno(0x20c)
BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC
BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE
BPF_F_RDONLY = linux.BPF_F_RDONLY
BPF_F_WRONLY = linux.BPF_F_WRONLY
BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG
BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE
BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE
BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP
BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN
BPF_TAG_SIZE = linux.BPF_TAG_SIZE
BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT
BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT
BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ
SYS_BPF = linux.SYS_BPF
F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC
EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD
EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC
O_CLOEXEC = linux.O_CLOEXEC
O_NONBLOCK = linux.O_NONBLOCK
PROT_READ = linux.PROT_READ
PROT_WRITE = linux.PROT_WRITE
MAP_SHARED = linux.MAP_SHARED
PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE
PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE
PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF
PerfBitWatermark = linux.PerfBitWatermark
PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY = linux.RLIM_INFINITY
RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK
BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME
PERF_RECORD_LOST = linux.PERF_RECORD_LOST
PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE
AT_FDCWD = linux.AT_FDCWD
RENAME_NOREPLACE = linux.RENAME_NOREPLACE
SO_ATTACH_BPF = linux.SO_ATTACH_BPF
SO_DETACH_BPF = linux.SO_DETACH_BPF
SOL_SOCKET = linux.SOL_SOCKET
ENOENT = linux.ENOENT
EEXIST = linux.EEXIST
EAGAIN = linux.EAGAIN
ENOSPC = linux.ENOSPC
EINVAL = linux.EINVAL
EPOLLIN = linux.EPOLLIN
EINTR = linux.EINTR
EPERM = linux.EPERM
ESRCH = linux.ESRCH
ENODEV = linux.ENODEV
EBADF = linux.EBADF
E2BIG = linux.E2BIG
EFAULT = linux.EFAULT
EACCES = linux.EACCES
EILSEQ = linux.EILSEQ
EOPNOTSUPP = linux.EOPNOTSUPP
)
const (
BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC
BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE
BPF_F_RDONLY = linux.BPF_F_RDONLY
BPF_F_WRONLY = linux.BPF_F_WRONLY
BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG
BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE
BPF_F_XDP_HAS_FRAGS = linux.BPF_F_XDP_HAS_FRAGS
BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE
BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP
BPF_F_KPROBE_MULTI_RETURN = linux.BPF_F_KPROBE_MULTI_RETURN
BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN
BPF_TAG_SIZE = linux.BPF_TAG_SIZE
BPF_RINGBUF_BUSY_BIT = linux.BPF_RINGBUF_BUSY_BIT
BPF_RINGBUF_DISCARD_BIT = linux.BPF_RINGBUF_DISCARD_BIT
BPF_RINGBUF_HDR_SZ = linux.BPF_RINGBUF_HDR_SZ
SYS_BPF = linux.SYS_BPF
F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC
EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD
EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC
O_CLOEXEC = linux.O_CLOEXEC
O_NONBLOCK = linux.O_NONBLOCK
PROT_NONE = linux.PROT_NONE
PROT_READ = linux.PROT_READ
PROT_WRITE = linux.PROT_WRITE
MAP_ANON = linux.MAP_ANON
MAP_SHARED = linux.MAP_SHARED
MAP_PRIVATE = linux.MAP_PRIVATE
PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT
PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE
PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE
PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF
PerfBitWatermark = linux.PerfBitWatermark
PerfBitWriteBackward = linux.PerfBitWriteBackward
PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY = linux.RLIM_INFINITY
RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK
BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME
PERF_RECORD_LOST = linux.PERF_RECORD_LOST
PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE
AT_FDCWD = linux.AT_FDCWD
RENAME_NOREPLACE = linux.RENAME_NOREPLACE
SO_ATTACH_BPF = linux.SO_ATTACH_BPF
SO_DETACH_BPF = linux.SO_DETACH_BPF
SOL_SOCKET = linux.SOL_SOCKET
SIGPROF = linux.SIGPROF
SIG_BLOCK = linux.SIG_BLOCK
SIG_UNBLOCK = linux.SIG_UNBLOCK
EM_NONE = linux.EM_NONE
EM_BPF = linux.EM_BPF
BPF_FS_MAGIC = linux.BPF_FS_MAGIC
TRACEFS_MAGIC = linux.TRACEFS_MAGIC
DEBUGFS_MAGIC = linux.DEBUGFS_MAGIC
)
// Statfs_t is a wrapper
type Statfs_t = linux.Statfs_t
type Stat_t = linux.Stat_t
// Rlimit is a wrapper
type Rlimit = linux.Rlimit
type Signal = linux.Signal
type Sigset_t = linux.Sigset_t
type PerfEventMmapPage = linux.PerfEventMmapPage
type EpollEvent = linux.EpollEvent
type PerfEventAttr = linux.PerfEventAttr
type Utsname = linux.Utsname
// Syscall is a wrapper
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return linux.Syscall(trap, a1, a2, a3)
}
// FcntlInt is a wrapper
func PthreadSigmask(how int, set, oldset *Sigset_t) error {
return linux.PthreadSigmask(how, set, oldset)
}
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
return linux.FcntlInt(fd, cmd, arg)
}
// IoctlSetInt is a wrapper
func IoctlSetInt(fd int, req uint, value int) error {
return linux.IoctlSetInt(fd, req, value)
}
// Statfs is a wrapper
func Statfs(path string, buf *Statfs_t) (err error) {
return linux.Statfs(path, buf)
}
// Close is a wrapper
func Close(fd int) (err error) {
return linux.Close(fd)
}
// EpollEvent is a wrapper
type EpollEvent = linux.EpollEvent
// EpollWait is a wrapper
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
return linux.EpollWait(epfd, events, msec)
}
// EpollCtl is a wrapper
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
return linux.EpollCtl(epfd, op, fd, event)
}
// Eventfd is a wrapper
func Eventfd(initval uint, flags int) (fd int, err error) {
return linux.Eventfd(initval, flags)
}
// Write is a wrapper
func Write(fd int, p []byte) (n int, err error) {
return linux.Write(fd, p)
}
// EpollCreate1 is a wrapper
func EpollCreate1(flag int) (fd int, err error) {
return linux.EpollCreate1(flag)
}
// PerfEventMmapPage is a wrapper
type PerfEventMmapPage linux.PerfEventMmapPage
// SetNonblock is a wrapper
func SetNonblock(fd int, nonblocking bool) (err error) {
return linux.SetNonblock(fd, nonblocking)
}
// Mmap is a wrapper
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
return linux.Mmap(fd, offset, length, prot, flags)
}
// Munmap is a wrapper
func Munmap(b []byte) (err error) {
return linux.Munmap(b)
}
// PerfEventAttr is a wrapper
type PerfEventAttr = linux.PerfEventAttr
// PerfEventOpen is a wrapper
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags)
}
// Utsname is a wrapper
type Utsname = linux.Utsname
// Uname is a wrapper
func Uname(buf *Utsname) (err error) {
return linux.Uname(buf)
}
// Getpid is a wrapper
func Getpid() int {
return linux.Getpid()
}
// Gettid is a wrapper
func Gettid() int {
return linux.Gettid()
}
// Tgkill is a wrapper
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return linux.Tgkill(tgid, tid, sig)
}
// BytePtrFromString is a wrapper
func BytePtrFromString(s string) (*byte, error) {
return linux.BytePtrFromString(s)
}
// ByteSliceToString is a wrapper
func ByteSliceToString(s []byte) string {
return linux.ByteSliceToString(s)
}
// Renameat2 is a wrapper
func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags)
}
@@ -208,3 +196,7 @@ func Open(path string, mode int, perm uint32) (int, error) {
func Fstat(fd int, stat *Stat_t) error {
return linux.Fstat(fd, stat)
}
func SetsockoptInt(fd, level, opt, value int) error {
return linux.SetsockoptInt(fd, level, opt, value)
}

View File

@@ -1,5 +1,4 @@
//go:build !linux
// +build !linux
package unix
@@ -11,70 +10,87 @@ import (
var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
// Errnos are distinct and non-zero.
const (
ENOENT = syscall.ENOENT
EEXIST = syscall.EEXIST
EAGAIN = syscall.EAGAIN
ENOSPC = syscall.ENOSPC
EINVAL = syscall.EINVAL
EINTR = syscall.EINTR
EPERM = syscall.EPERM
ESRCH = syscall.ESRCH
ENODEV = syscall.ENODEV
EBADF = syscall.Errno(0)
E2BIG = syscall.Errno(0)
EFAULT = syscall.EFAULT
EACCES = syscall.Errno(0)
// ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP
ENOTSUPP = syscall.Errno(0x20c)
BPF_F_NO_PREALLOC = 0
BPF_F_NUMA_NODE = 0
BPF_F_RDONLY = 0
BPF_F_WRONLY = 0
BPF_F_RDONLY_PROG = 0
BPF_F_WRONLY_PROG = 0
BPF_F_SLEEPABLE = 0
BPF_F_MMAPABLE = 0
BPF_F_INNER_MAP = 0
BPF_OBJ_NAME_LEN = 0x10
BPF_TAG_SIZE = 0x8
BPF_RINGBUF_BUSY_BIT = 0
BPF_RINGBUF_DISCARD_BIT = 0
BPF_RINGBUF_HDR_SZ = 0
SYS_BPF = 321
F_DUPFD_CLOEXEC = 0x406
EPOLLIN = 0x1
EPOLL_CTL_ADD = 0x1
EPOLL_CLOEXEC = 0x80000
O_CLOEXEC = 0x80000
O_NONBLOCK = 0x800
PROT_READ = 0x1
PROT_WRITE = 0x2
MAP_SHARED = 0x1
PERF_ATTR_SIZE_VER1 = 0
PERF_TYPE_SOFTWARE = 0x1
PERF_TYPE_TRACEPOINT = 0
PERF_COUNT_SW_BPF_OUTPUT = 0xa
PERF_EVENT_IOC_DISABLE = 0
PERF_EVENT_IOC_ENABLE = 0
PERF_EVENT_IOC_SET_BPF = 0
PerfBitWatermark = 0x4000
PERF_SAMPLE_RAW = 0x400
PERF_FLAG_FD_CLOEXEC = 0x8
RLIM_INFINITY = 0x7fffffffffffffff
RLIMIT_MEMLOCK = 8
BPF_STATS_RUN_TIME = 0
PERF_RECORD_LOST = 2
PERF_RECORD_SAMPLE = 9
AT_FDCWD = -0x2
RENAME_NOREPLACE = 0x1
SO_ATTACH_BPF = 0x32
SO_DETACH_BPF = 0x1b
SOL_SOCKET = 0x1
ENOENT syscall.Errno = iota + 1
EEXIST
EAGAIN
ENOSPC
EINVAL
EINTR
EPERM
ESRCH
ENODEV
EBADF
E2BIG
EFAULT
EACCES
EILSEQ
EOPNOTSUPP
)
// Constants are distinct to avoid breaking switch statements.
const (
BPF_F_NO_PREALLOC = iota
BPF_F_NUMA_NODE
BPF_F_RDONLY
BPF_F_WRONLY
BPF_F_RDONLY_PROG
BPF_F_WRONLY_PROG
BPF_F_SLEEPABLE
BPF_F_MMAPABLE
BPF_F_INNER_MAP
BPF_F_KPROBE_MULTI_RETURN
BPF_F_XDP_HAS_FRAGS
BPF_OBJ_NAME_LEN
BPF_TAG_SIZE
BPF_RINGBUF_BUSY_BIT
BPF_RINGBUF_DISCARD_BIT
BPF_RINGBUF_HDR_SZ
SYS_BPF
F_DUPFD_CLOEXEC
EPOLLIN
EPOLL_CTL_ADD
EPOLL_CLOEXEC
O_CLOEXEC
O_NONBLOCK
PROT_NONE
PROT_READ
PROT_WRITE
MAP_ANON
MAP_SHARED
MAP_PRIVATE
PERF_ATTR_SIZE_VER1
PERF_TYPE_SOFTWARE
PERF_TYPE_TRACEPOINT
PERF_COUNT_SW_BPF_OUTPUT
PERF_EVENT_IOC_DISABLE
PERF_EVENT_IOC_ENABLE
PERF_EVENT_IOC_SET_BPF
PerfBitWatermark
PerfBitWriteBackward
PERF_SAMPLE_RAW
PERF_FLAG_FD_CLOEXEC
RLIM_INFINITY
RLIMIT_MEMLOCK
BPF_STATS_RUN_TIME
PERF_RECORD_LOST
PERF_RECORD_SAMPLE
AT_FDCWD
RENAME_NOREPLACE
SO_ATTACH_BPF
SO_DETACH_BPF
SOL_SOCKET
SIGPROF
SIG_BLOCK
SIG_UNBLOCK
EM_NONE
EM_BPF
BPF_FS_MAGIC
TRACEFS_MAGIC
DEBUGFS_MAGIC
)
// Statfs_t is a wrapper
type Statfs_t struct {
Type int64
Bsize int64
@@ -90,72 +106,81 @@ type Statfs_t struct {
Spare [4]int64
}
type Stat_t struct{}
type Stat_t struct {
Dev uint64
Ino uint64
Nlink uint64
Mode uint32
Uid uint32
Gid uint32
_ int32
Rdev uint64
Size int64
Blksize int64
Blocks int64
}
// Rlimit is a wrapper
type Rlimit struct {
Cur uint64
Max uint64
}
// Syscall is a wrapper
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return 0, 0, syscall.Errno(1)
type Signal int
type Sigset_t struct {
Val [4]uint64
}
func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) {
return 0, 0, syscall.ENOTSUP
}
func PthreadSigmask(how int, set, oldset *Sigset_t) error {
return errNonLinux
}
// FcntlInt is a wrapper
func FcntlInt(fd uintptr, cmd, arg int) (int, error) {
return -1, errNonLinux
}
// IoctlSetInt is a wrapper
func IoctlSetInt(fd int, req uint, value int) error {
return errNonLinux
}
// Statfs is a wrapper
func Statfs(path string, buf *Statfs_t) error {
return errNonLinux
}
// Close is a wrapper
func Close(fd int) (err error) {
return errNonLinux
}
// EpollEvent is a wrapper
type EpollEvent struct {
Events uint32
Fd int32
Pad int32
}
// EpollWait is a wrapper
func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) {
return 0, errNonLinux
}
// EpollCtl is a wrapper
func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) {
return errNonLinux
}
// Eventfd is a wrapper
func Eventfd(initval uint, flags int) (fd int, err error) {
return 0, errNonLinux
}
// Write is a wrapper
func Write(fd int, p []byte) (n int, err error) {
return 0, errNonLinux
}
// EpollCreate1 is a wrapper
func EpollCreate1(flag int) (fd int, err error) {
return 0, errNonLinux
}
// PerfEventMmapPage is a wrapper
type PerfEventMmapPage struct {
Version uint32
Compat_version uint32
@@ -182,22 +207,18 @@ type PerfEventMmapPage struct {
Aux_size uint64
}
// SetNonblock is a wrapper
func SetNonblock(fd int, nonblocking bool) (err error) {
return errNonLinux
}
// Mmap is a wrapper
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
return []byte{}, errNonLinux
}
// Munmap is a wrapper
func Munmap(b []byte) (err error) {
return errNonLinux
}
// PerfEventAttr is a wrapper
type PerfEventAttr struct {
Type uint32
Size uint32
@@ -219,48 +240,39 @@ type PerfEventAttr struct {
Sample_max_stack uint16
}
// PerfEventOpen is a wrapper
func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return 0, errNonLinux
}
// Utsname is a wrapper
type Utsname struct {
Release [65]byte
Version [65]byte
}
// Uname is a wrapper
func Uname(buf *Utsname) (err error) {
return errNonLinux
}
// Getpid is a wrapper
func Getpid() int {
return -1
}
// Gettid is a wrapper
func Gettid() int {
return -1
}
// Tgkill is a wrapper
func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) {
return errNonLinux
}
// BytePtrFromString is a wrapper
func BytePtrFromString(s string) (*byte, error) {
return nil, errNonLinux
}
// ByteSliceToString is a wrapper
func ByteSliceToString(s []byte) string {
return ""
}
// Renameat2 is a wrapper
func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error {
return errNonLinux
}
@@ -276,3 +288,7 @@ func Open(path string, mode int, perm uint32) (int, error) {
func Fstat(fd int, stat *Stat_t) error {
return errNonLinux
}
func SetsockoptInt(fd, level, opt, value int) error {
return errNonLinux
}

View File

@@ -23,6 +23,9 @@ func vdsoVersion() (uint32, error) {
// to the process. Go does not expose that data, so we must read it from procfs.
// https://man7.org/linux/man-pages/man3/getauxval.3.html
av, err := os.Open("/proc/self/auxv")
if errors.Is(err, unix.EACCES) {
return 0, fmt.Errorf("opening auxv: %w (process may not be dumpable due to file capabilities)", err)
}
if err != nil {
return 0, fmt.Errorf("opening auxv: %w", err)
}
@@ -117,7 +120,7 @@ func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) {
var name string
if n.NameSize > 0 {
// Read the note name, aligned to 4 bytes.
buf := make([]byte, Align(int(n.NameSize), 4))
buf := make([]byte, Align(n.NameSize, 4))
if err := binary.Read(sr, hdr.ByteOrder, &buf); err != nil {
return 0, fmt.Errorf("reading note name: %w", err)
}
@@ -139,7 +142,7 @@ func vdsoLinuxVersionCode(r io.ReaderAt) (uint32, error) {
}
// Discard the note descriptor if it exists but we're not interested in it.
if _, err := io.CopyN(io.Discard, sr, int64(Align(int(n.DescSize), 4))); err != nil {
if _, err := io.CopyN(io.Discard, sr, int64(Align(n.DescSize, 4))); err != nil {
return 0, err
}
}

View File

@@ -2,7 +2,6 @@ package internal
import (
"fmt"
"sync"
"github.com/cilium/ebpf/internal/unix"
)
@@ -15,14 +14,6 @@ const (
MagicKernelVersion = 0xFFFFFFFE
)
var (
kernelVersion = struct {
once sync.Once
version Version
err error
}{}
)
// A Version in the form Major.Minor.Patch.
type Version [3]uint16
@@ -88,16 +79,9 @@ func (v Version) Kernel() uint32 {
}
// KernelVersion returns the version of the currently running kernel.
func KernelVersion() (Version, error) {
kernelVersion.once.Do(func() {
kernelVersion.version, kernelVersion.err = detectKernelVersion()
})
if kernelVersion.err != nil {
return Version{}, kernelVersion.err
}
return kernelVersion.version, nil
}
var KernelVersion = Memoize(func() (Version, error) {
return detectKernelVersion()
})
// detectKernelVersion returns the version of the running kernel.
func detectKernelVersion() (Version, error) {