51
pkg/sys/filesys_deprecated_windows.go
Normal file
51
pkg/sys/filesys_deprecated_windows.go
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/moby/sys/sequential"
|
||||
)
|
||||
|
||||
// CreateSequential is deprecated.
|
||||
//
|
||||
// Deprecated: use github.com/moby/sys/sequential.Create
|
||||
func CreateSequential(name string) (*os.File, error) {
|
||||
return sequential.Create(name)
|
||||
}
|
||||
|
||||
// OpenSequential is deprecated.
|
||||
//
|
||||
// Deprecated: use github.com/moby/sys/sequential.Open
|
||||
func OpenSequential(name string) (*os.File, error) {
|
||||
return sequential.Open(name)
|
||||
}
|
||||
|
||||
// OpenFileSequential is deprecated.
|
||||
//
|
||||
// Deprecated: use github.com/moby/sys/sequential.OpenFile
|
||||
func OpenFileSequential(name string, flag int, perm os.FileMode) (*os.File, error) {
|
||||
return sequential.OpenFile(name, flag, perm)
|
||||
}
|
||||
|
||||
// TempFileSequential is deprecated.
|
||||
//
|
||||
// Deprecated: use github.com/moby/sys/sequential.CreateTemp
|
||||
func TempFileSequential(dir, prefix string) (f *os.File, err error) {
|
||||
return sequential.CreateTemp(dir, prefix)
|
||||
}
|
||||
26
pkg/sys/filesys_unix.go
Normal file
26
pkg/sys/filesys_unix.go
Normal file
@@ -0,0 +1,26 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import "os"
|
||||
|
||||
// MkdirAllWithACL is a wrapper for os.MkdirAll on Unix systems.
|
||||
func MkdirAllWithACL(path string, perm os.FileMode) error {
|
||||
return os.MkdirAll(path, perm)
|
||||
}
|
||||
151
pkg/sys/filesys_windows.go
Normal file
151
pkg/sys/filesys_windows.go
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"os"
|
||||
"regexp"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
// SddlAdministratorsLocalSystem is local administrators plus NT AUTHORITY\System.
|
||||
const SddlAdministratorsLocalSystem = "D:P(A;OICI;GA;;;BA)(A;OICI;GA;;;SY)"
|
||||
|
||||
// volumePath is a regular expression to check if a path is a Windows
|
||||
// volume path (e.g., "\\?\Volume{4c1b02c1-d990-11dc-99ae-806e6f6e6963}"
|
||||
// or "\\?\Volume{4c1b02c1-d990-11dc-99ae-806e6f6e6963}\").
|
||||
var volumePath = regexp.MustCompile(`^\\\\\?\\Volume{[a-z0-9-]+}\\?$`)
|
||||
|
||||
// MkdirAllWithACL is a custom version of os.MkdirAll modified for use on Windows
|
||||
// so that it is both volume path aware, and to create a directory
|
||||
// an appropriate SDDL defined ACL for Builtin Administrators and Local System.
|
||||
func MkdirAllWithACL(path string, _ os.FileMode) error {
|
||||
sa, err := makeSecurityAttributes(SddlAdministratorsLocalSystem)
|
||||
if err != nil {
|
||||
return &os.PathError{Op: "mkdirall", Path: path, Err: err}
|
||||
}
|
||||
return mkdirall(path, sa)
|
||||
}
|
||||
|
||||
// MkdirAll is a custom version of os.MkdirAll that is volume path aware for
|
||||
// Windows. It can be used as a drop-in replacement for os.MkdirAll.
|
||||
func MkdirAll(path string, _ os.FileMode) error {
|
||||
return mkdirall(path, nil)
|
||||
}
|
||||
|
||||
// mkdirall is a custom version of os.MkdirAll modified for use on Windows
|
||||
// so that it is both volume path aware, and can create a directory with
|
||||
// a DACL.
|
||||
func mkdirall(path string, perm *windows.SecurityAttributes) error {
|
||||
if volumePath.MatchString(path) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// The rest of this method is largely copied from os.MkdirAll and should be kept
|
||||
// as-is to ensure compatibility.
|
||||
|
||||
// Fast path: if we can tell whether path is a directory or file, stop with success or error.
|
||||
dir, err := os.Stat(path)
|
||||
if err == nil {
|
||||
if dir.IsDir() {
|
||||
return nil
|
||||
}
|
||||
return &os.PathError{Op: "mkdir", Path: path, Err: syscall.ENOTDIR}
|
||||
}
|
||||
|
||||
// Slow path: make sure parent exists and then call Mkdir for path.
|
||||
i := len(path)
|
||||
for i > 0 && os.IsPathSeparator(path[i-1]) { // Skip trailing path separator.
|
||||
i--
|
||||
}
|
||||
|
||||
j := i
|
||||
for j > 0 && !os.IsPathSeparator(path[j-1]) { // Scan backward over element.
|
||||
j--
|
||||
}
|
||||
|
||||
if j > 1 {
|
||||
// Create parent.
|
||||
err = mkdirall(fixRootDirectory(path[:j-1]), perm)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Parent now exists; invoke Mkdir and use its result.
|
||||
err = mkdirWithACL(path, perm)
|
||||
if err != nil {
|
||||
// Handle arguments like "foo/." by
|
||||
// double-checking that directory doesn't exist.
|
||||
dir, err1 := os.Lstat(path)
|
||||
if err1 == nil && dir.IsDir() {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// mkdirWithACL creates a new directory. If there is an error, it will be of
|
||||
// type *PathError. .
|
||||
//
|
||||
// This is a modified and combined version of os.Mkdir and windows.Mkdir
|
||||
// in golang to cater for creating a directory am ACL permitting full
|
||||
// access, with inheritance, to any subfolder/file for Built-in Administrators
|
||||
// and Local System.
|
||||
func mkdirWithACL(name string, sa *windows.SecurityAttributes) error {
|
||||
if sa == nil {
|
||||
return os.Mkdir(name, 0)
|
||||
}
|
||||
|
||||
namep, err := windows.UTF16PtrFromString(name)
|
||||
if err != nil {
|
||||
return &os.PathError{Op: "mkdir", Path: name, Err: err}
|
||||
}
|
||||
|
||||
err = windows.CreateDirectory(namep, sa)
|
||||
if err != nil {
|
||||
return &os.PathError{Op: "mkdir", Path: name, Err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// fixRootDirectory fixes a reference to a drive's root directory to
|
||||
// have the required trailing slash.
|
||||
func fixRootDirectory(p string) string {
|
||||
if len(p) == len(`\\?\c:`) {
|
||||
if os.IsPathSeparator(p[0]) && os.IsPathSeparator(p[1]) && p[2] == '?' && os.IsPathSeparator(p[3]) && p[5] == ':' {
|
||||
return p + `\`
|
||||
}
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func makeSecurityAttributes(sddl string) (*windows.SecurityAttributes, error) {
|
||||
var sa windows.SecurityAttributes
|
||||
sa.Length = uint32(unsafe.Sizeof(sa))
|
||||
sa.InheritHandle = 1
|
||||
var err error
|
||||
sa.SecurityDescriptor, err = windows.SecurityDescriptorFromString(sddl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &sa, nil
|
||||
}
|
||||
82
pkg/sys/oom_linux.go
Normal file
82
pkg/sys/oom_linux.go
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/containerd/containerd/v2/pkg/userns"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// OOMScoreAdjMin is from OOM_SCORE_ADJ_MIN https://github.com/torvalds/linux/blob/v5.10/include/uapi/linux/oom.h#L9
|
||||
OOMScoreAdjMin = -1000
|
||||
// OOMScoreAdjMax is from OOM_SCORE_ADJ_MAX https://github.com/torvalds/linux/blob/v5.10/include/uapi/linux/oom.h#L10
|
||||
OOMScoreAdjMax = 1000
|
||||
)
|
||||
|
||||
// AdjustOOMScore sets the oom score for the provided pid. If the provided score
|
||||
// is out of range (-1000 - 1000), it is clipped to the min/max value.
|
||||
func AdjustOOMScore(pid, score int) error {
|
||||
if score > OOMScoreAdjMax {
|
||||
score = OOMScoreAdjMax
|
||||
} else if score < OOMScoreAdjMin {
|
||||
score = OOMScoreAdjMin
|
||||
}
|
||||
return SetOOMScore(pid, score)
|
||||
}
|
||||
|
||||
// SetOOMScore sets the oom score for the provided pid
|
||||
func SetOOMScore(pid, score int) error {
|
||||
if score > OOMScoreAdjMax || score < OOMScoreAdjMin {
|
||||
return fmt.Errorf("value out of range (%d): OOM score must be between %d and %d", score, OOMScoreAdjMin, OOMScoreAdjMax)
|
||||
}
|
||||
path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
|
||||
f, err := os.OpenFile(path, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
if _, err = f.WriteString(strconv.Itoa(score)); err != nil {
|
||||
if os.IsPermission(err) && (!runningPrivileged() || userns.RunningInUserNS()) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetOOMScoreAdj gets the oom score for a process. It returns 0 (zero) if either
|
||||
// no oom score is set, or a sore is set to 0.
|
||||
func GetOOMScoreAdj(pid int) (int, error) {
|
||||
path := fmt.Sprintf("/proc/%d/oom_score_adj", pid)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.Atoi(strings.TrimSpace(string(data)))
|
||||
}
|
||||
|
||||
// runningPrivileged returns true if the effective user ID of the
|
||||
// calling process is 0
|
||||
func runningPrivileged() bool {
|
||||
return unix.Geteuid() == 0
|
||||
}
|
||||
129
pkg/sys/oom_linux_test.go
Normal file
129
pkg/sys/oom_linux_test.go
Normal file
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/containerd/containerd/v2/pkg/userns"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestSetPositiveOomScoreAdjustment(t *testing.T) {
|
||||
// Setting a *positive* OOM score adjust does not require privileged
|
||||
_, adjustment, err := adjustOom(123)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, adjustment, 123)
|
||||
}
|
||||
|
||||
func TestSetNegativeOomScoreAdjustmentWhenPrivileged(t *testing.T) {
|
||||
if !runningPrivileged() || userns.RunningInUserNS() {
|
||||
t.Skip("requires root and not running in user namespace")
|
||||
return
|
||||
}
|
||||
|
||||
_, adjustment, err := adjustOom(-123)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, adjustment, -123)
|
||||
}
|
||||
|
||||
func TestSetNegativeOomScoreAdjustmentWhenUnprivilegedHasNoEffect(t *testing.T) {
|
||||
if runningPrivileged() && !userns.RunningInUserNS() {
|
||||
t.Skip("needs to be run as non-root or in user namespace")
|
||||
return
|
||||
}
|
||||
|
||||
initial, adjustment, err := adjustOom(-123)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, adjustment, initial)
|
||||
}
|
||||
|
||||
func TestSetOOMScoreBoundaries(t *testing.T) {
|
||||
err := SetOOMScore(0, OOMScoreAdjMax+1)
|
||||
require.NotNil(t, err)
|
||||
assert.Contains(t, err.Error(), fmt.Sprintf("value out of range (%d): OOM score must be between", OOMScoreAdjMax+1))
|
||||
|
||||
err = SetOOMScore(0, OOMScoreAdjMin-1)
|
||||
require.NotNil(t, err)
|
||||
assert.Contains(t, err.Error(), fmt.Sprintf("value out of range (%d): OOM score must be between", OOMScoreAdjMin-1))
|
||||
|
||||
_, adjustment, err := adjustOom(OOMScoreAdjMax)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, adjustment, OOMScoreAdjMax)
|
||||
|
||||
score, err := GetOOMScoreAdj(os.Getpid())
|
||||
assert.NoError(t, err)
|
||||
if score == OOMScoreAdjMin {
|
||||
// We won't be able to set the score lower than the parent process. This
|
||||
// could also be tested if the parent process does not have a oom-score-adj
|
||||
// set, but GetOOMScoreAdj does not distinguish between "not set" and
|
||||
// "score is set, but zero".
|
||||
_, adjustment, err = adjustOom(OOMScoreAdjMin)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, adjustment, OOMScoreAdjMin)
|
||||
}
|
||||
}
|
||||
|
||||
func adjustOom(adjustment int) (int, int, error) {
|
||||
cmd := exec.Command("sleep", "100")
|
||||
if err := cmd.Start(); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
defer cmd.Process.Kill()
|
||||
|
||||
pid, err := waitForPid(cmd.Process)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
initial, err := GetOOMScoreAdj(pid)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
if err := SetOOMScore(pid, adjustment); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
adj, err := GetOOMScoreAdj(pid)
|
||||
return initial, adj, err
|
||||
}
|
||||
|
||||
func waitForPid(process *os.Process) (int, error) {
|
||||
c := make(chan int, 1)
|
||||
go func() {
|
||||
for {
|
||||
pid := process.Pid
|
||||
if pid != 0 {
|
||||
c <- pid
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
select {
|
||||
case pid := <-c:
|
||||
return pid, nil
|
||||
case <-time.After(10 * time.Second):
|
||||
return 0, errors.New("process did not start in 10 seconds")
|
||||
}
|
||||
}
|
||||
48
pkg/sys/oom_unsupported.go
Normal file
48
pkg/sys/oom_unsupported.go
Normal file
@@ -0,0 +1,48 @@
|
||||
//go:build !linux
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
const (
|
||||
// OOMScoreMaxKillable is not implemented on non Linux
|
||||
OOMScoreMaxKillable = 0
|
||||
// OOMScoreAdjMax is not implemented on non Linux
|
||||
OOMScoreAdjMax = 0
|
||||
)
|
||||
|
||||
// AdjustOOMScore sets the oom score for the provided pid. If the provided score
|
||||
// is out of range (-1000 - 1000), it is clipped to the min/max value.
|
||||
//
|
||||
// Not implemented on Windows
|
||||
func AdjustOOMScore(pid, score int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetOOMScore sets the oom score for the process
|
||||
//
|
||||
// Not implemented on Windows
|
||||
func SetOOMScore(pid, score int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetOOMScoreAdj gets the oom score for a process
|
||||
//
|
||||
// Not implemented on Windows
|
||||
func GetOOMScoreAdj(pid int) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
290
pkg/sys/reaper/reaper_unix.go
Normal file
290
pkg/sys/reaper/reaper_unix.go
Normal file
@@ -0,0 +1,290 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package reaper
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
runc "github.com/containerd/go-runc"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// ErrNoSuchProcess is returned when the process no longer exists
|
||||
var ErrNoSuchProcess = errors.New("no such process")
|
||||
|
||||
const bufferSize = 32
|
||||
|
||||
type subscriber struct {
|
||||
sync.Mutex
|
||||
c chan runc.Exit
|
||||
closed bool
|
||||
}
|
||||
|
||||
func (s *subscriber) close() {
|
||||
s.Lock()
|
||||
if s.closed {
|
||||
s.Unlock()
|
||||
return
|
||||
}
|
||||
close(s.c)
|
||||
s.closed = true
|
||||
s.Unlock()
|
||||
}
|
||||
|
||||
func (s *subscriber) do(fn func()) {
|
||||
s.Lock()
|
||||
fn()
|
||||
s.Unlock()
|
||||
}
|
||||
|
||||
// Reap should be called when the process receives an SIGCHLD. Reap will reap
|
||||
// all exited processes and close their wait channels
|
||||
func Reap() error {
|
||||
now := time.Now()
|
||||
exits, err := reap(false)
|
||||
for _, e := range exits {
|
||||
done := Default.notify(runc.Exit{
|
||||
Timestamp: now,
|
||||
Pid: e.Pid,
|
||||
Status: e.Status,
|
||||
})
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(1 * time.Second):
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Default is the default monitor initialized for the package
|
||||
var Default = &Monitor{
|
||||
subscribers: make(map[chan runc.Exit]*subscriber),
|
||||
}
|
||||
|
||||
// Monitor monitors the underlying system for process status changes
|
||||
type Monitor struct {
|
||||
sync.Mutex
|
||||
|
||||
subscribers map[chan runc.Exit]*subscriber
|
||||
}
|
||||
|
||||
// Start starts the command and registers the process with the reaper
|
||||
func (m *Monitor) Start(c *exec.Cmd) (chan runc.Exit, error) {
|
||||
ec := m.Subscribe()
|
||||
if err := c.Start(); err != nil {
|
||||
m.Unsubscribe(ec)
|
||||
return nil, err
|
||||
}
|
||||
return ec, nil
|
||||
}
|
||||
|
||||
// StartLocked starts the command and registers the process with the reaper
|
||||
func (m *Monitor) StartLocked(c *exec.Cmd) (chan runc.Exit, error) {
|
||||
runtime.LockOSThread()
|
||||
defer runtime.UnlockOSThread()
|
||||
return m.Start(c)
|
||||
}
|
||||
|
||||
// Wait blocks until a process is signal as dead.
|
||||
// User should rely on the value of the exit status to determine if the
|
||||
// command was successful or not.
|
||||
func (m *Monitor) Wait(c *exec.Cmd, ec chan runc.Exit) (int, error) {
|
||||
for e := range ec {
|
||||
if e.Pid == c.Process.Pid {
|
||||
// make sure we flush all IO
|
||||
c.Wait()
|
||||
m.Unsubscribe(ec)
|
||||
return e.Status, nil
|
||||
}
|
||||
}
|
||||
// return no such process if the ec channel is closed and no more exit
|
||||
// events will be sent
|
||||
return -1, ErrNoSuchProcess
|
||||
}
|
||||
|
||||
// WaitTimeout is used to skip the blocked command and kill the left process.
|
||||
func (m *Monitor) WaitTimeout(c *exec.Cmd, ec chan runc.Exit, timeout time.Duration) (int, error) {
|
||||
type exitStatusWrapper struct {
|
||||
status int
|
||||
err error
|
||||
}
|
||||
|
||||
// capacity can make sure that the following goroutine will not be
|
||||
// blocked if there is no receiver when timeout.
|
||||
waitCh := make(chan *exitStatusWrapper, 1)
|
||||
go func() {
|
||||
defer close(waitCh)
|
||||
|
||||
status, err := m.Wait(c, ec)
|
||||
waitCh <- &exitStatusWrapper{
|
||||
status: status,
|
||||
err: err,
|
||||
}
|
||||
}()
|
||||
|
||||
timer := time.NewTimer(timeout)
|
||||
defer timer.Stop()
|
||||
|
||||
select {
|
||||
case <-timer.C:
|
||||
syscall.Kill(c.Process.Pid, syscall.SIGKILL)
|
||||
return 0, fmt.Errorf("timeout %v for cmd(pid=%d): %s, %s", timeout, c.Process.Pid, c.Path, c.Args)
|
||||
case res := <-waitCh:
|
||||
return res.status, res.err
|
||||
}
|
||||
}
|
||||
|
||||
// Subscribe to process exit changes
|
||||
func (m *Monitor) Subscribe() chan runc.Exit {
|
||||
c := make(chan runc.Exit, bufferSize)
|
||||
m.Lock()
|
||||
m.subscribers[c] = &subscriber{
|
||||
c: c,
|
||||
}
|
||||
m.Unlock()
|
||||
return c
|
||||
}
|
||||
|
||||
// Unsubscribe to process exit changes
|
||||
func (m *Monitor) Unsubscribe(c chan runc.Exit) {
|
||||
m.Lock()
|
||||
s, ok := m.subscribers[c]
|
||||
if !ok {
|
||||
m.Unlock()
|
||||
return
|
||||
}
|
||||
s.close()
|
||||
delete(m.subscribers, c)
|
||||
m.Unlock()
|
||||
}
|
||||
|
||||
func (m *Monitor) getSubscribers() map[chan runc.Exit]*subscriber {
|
||||
out := make(map[chan runc.Exit]*subscriber)
|
||||
m.Lock()
|
||||
for k, v := range m.subscribers {
|
||||
out[k] = v
|
||||
}
|
||||
m.Unlock()
|
||||
return out
|
||||
}
|
||||
|
||||
func (m *Monitor) notify(e runc.Exit) chan struct{} {
|
||||
const timeout = 1 * time.Millisecond
|
||||
var (
|
||||
done = make(chan struct{}, 1)
|
||||
timer = time.NewTimer(timeout)
|
||||
success = make(map[chan runc.Exit]struct{})
|
||||
)
|
||||
stop(timer, true)
|
||||
|
||||
go func() {
|
||||
defer close(done)
|
||||
|
||||
for {
|
||||
var (
|
||||
failed int
|
||||
subscribers = m.getSubscribers()
|
||||
)
|
||||
for _, s := range subscribers {
|
||||
s.do(func() {
|
||||
if s.closed {
|
||||
return
|
||||
}
|
||||
if _, ok := success[s.c]; ok {
|
||||
return
|
||||
}
|
||||
timer.Reset(timeout)
|
||||
recv := true
|
||||
select {
|
||||
case s.c <- e:
|
||||
success[s.c] = struct{}{}
|
||||
case <-timer.C:
|
||||
recv = false
|
||||
failed++
|
||||
}
|
||||
stop(timer, recv)
|
||||
})
|
||||
}
|
||||
// all subscribers received the message
|
||||
if failed == 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
return done
|
||||
}
|
||||
|
||||
func stop(timer *time.Timer, recv bool) {
|
||||
if !timer.Stop() && recv {
|
||||
<-timer.C
|
||||
}
|
||||
}
|
||||
|
||||
// exit is the wait4 information from an exited process
|
||||
type exit struct {
|
||||
Pid int
|
||||
Status int
|
||||
}
|
||||
|
||||
// reap reaps all child processes for the calling process and returns their
|
||||
// exit information
|
||||
func reap(wait bool) (exits []exit, err error) {
|
||||
var (
|
||||
ws unix.WaitStatus
|
||||
rus unix.Rusage
|
||||
)
|
||||
flag := unix.WNOHANG
|
||||
if wait {
|
||||
flag = 0
|
||||
}
|
||||
for {
|
||||
pid, err := unix.Wait4(-1, &ws, flag, &rus)
|
||||
if err != nil {
|
||||
if err == unix.ECHILD {
|
||||
return exits, nil
|
||||
}
|
||||
return exits, err
|
||||
}
|
||||
if pid <= 0 {
|
||||
return exits, nil
|
||||
}
|
||||
exits = append(exits, exit{
|
||||
Pid: pid,
|
||||
Status: exitStatus(ws),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const exitSignalOffset = 128
|
||||
|
||||
// exitStatus returns the correct exit status for a process based on if it
|
||||
// was signaled or exited cleanly
|
||||
func exitStatus(status unix.WaitStatus) int {
|
||||
if status.Signaled() {
|
||||
return exitSignalOffset + int(status.Signal())
|
||||
}
|
||||
return status.ExitStatus()
|
||||
}
|
||||
39
pkg/sys/reaper/reaper_utils_linux.go
Normal file
39
pkg/sys/reaper/reaper_utils_linux.go
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package reaper
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// SetSubreaper sets the value i as the subreaper setting for the calling process
|
||||
func SetSubreaper(i int) error {
|
||||
return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
||||
}
|
||||
|
||||
// GetSubreaper returns the subreaper setting for the calling process
|
||||
func GetSubreaper() (int, error) {
|
||||
var i uintptr
|
||||
|
||||
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return int(i), nil
|
||||
}
|
||||
80
pkg/sys/socket_unix.go
Normal file
80
pkg/sys/socket_unix.go
Normal file
@@ -0,0 +1,80 @@
|
||||
//go:build !windows
|
||||
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// CreateUnixSocket creates a unix socket and returns the listener
|
||||
func CreateUnixSocket(path string) (net.Listener, error) {
|
||||
// BSDs have a 104 limit
|
||||
if len(path) > 104 {
|
||||
return nil, fmt.Errorf("%q: unix socket path too long (> 104)", path)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0660); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := unix.Unlink(path); err != nil && !os.IsNotExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
return net.Listen("unix", path)
|
||||
}
|
||||
|
||||
// GetLocalListener returns a listener out of a unix socket.
|
||||
func GetLocalListener(path string, uid, gid int) (net.Listener, error) {
|
||||
// Ensure parent directory is created
|
||||
if err := mkdirAs(filepath.Dir(path), uid, gid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
l, err := CreateUnixSocket(path)
|
||||
if err != nil {
|
||||
return l, err
|
||||
}
|
||||
|
||||
if err := os.Chmod(path, 0660); err != nil {
|
||||
l.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := os.Chown(path, uid, gid); err != nil {
|
||||
l.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return l, nil
|
||||
}
|
||||
|
||||
func mkdirAs(path string, uid, gid int) error {
|
||||
if _, err := os.Stat(path); !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0770); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.Chown(path, uid, gid)
|
||||
}
|
||||
30
pkg/sys/socket_windows.go
Normal file
30
pkg/sys/socket_windows.go
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"net"
|
||||
|
||||
"github.com/Microsoft/go-winio"
|
||||
)
|
||||
|
||||
// GetLocalListener returns a Listernet out of a named pipe.
|
||||
// `path` must be of the form of `\\.\pipe\<pipename>`
|
||||
// (see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365150)
|
||||
func GetLocalListener(path string, uid, gid int) (net.Listener, error) {
|
||||
return winio.ListenPipe(path, nil)
|
||||
}
|
||||
30
pkg/sys/subprocess_unsafe_linux.go
Normal file
30
pkg/sys/subprocess_unsafe_linux.go
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
_ "unsafe" // required for go:linkname.
|
||||
)
|
||||
|
||||
//go:linkname beforeFork syscall.runtime_BeforeFork
|
||||
func beforeFork()
|
||||
|
||||
//go:linkname afterFork syscall.runtime_AfterFork
|
||||
func afterFork()
|
||||
|
||||
//go:linkname afterForkInChild syscall.runtime_AfterForkInChild
|
||||
func afterForkInChild()
|
||||
94
pkg/sys/userns_unsafe_linux.go
Normal file
94
pkg/sys/userns_unsafe_linux.go
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package sys
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// ForkUserns is to fork child process with user namespace. It returns child
|
||||
// process's pid and pidfd reference to the child process.
|
||||
//
|
||||
// Precondition: The runtime OS thread must be locked, which is GO runtime
|
||||
// requirement.
|
||||
//
|
||||
// Beside this, the child process sets PR_SET_PDEATHSIG with SIGKILL so that
|
||||
// the parent process's OS thread must be locked. Otherwise, the exit event of
|
||||
// parent process's OS thread will send kill signal to the child process,
|
||||
// even if parent process is still running.
|
||||
//
|
||||
//go:norace
|
||||
//go:noinline
|
||||
func ForkUserns() (_pid uintptr, _pidfd uintptr, _ syscall.Errno) {
|
||||
var (
|
||||
pidfd uintptr
|
||||
pid, ppid uintptr
|
||||
err syscall.Errno
|
||||
)
|
||||
|
||||
ppid, _, err = syscall.RawSyscall(uintptr(syscall.SYS_GETPID), 0, 0, 0)
|
||||
if err != 0 {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
beforeFork()
|
||||
if runtime.GOARCH == "s390x" {
|
||||
// NOTE:
|
||||
//
|
||||
// On the s390 architectures, the order of the first two
|
||||
// arguments is reversed.
|
||||
//
|
||||
// REF: https://man7.org/linux/man-pages/man2/clone.2.html
|
||||
pid, _, err = syscall.RawSyscall(syscall.SYS_CLONE,
|
||||
0,
|
||||
uintptr(syscall.CLONE_NEWUSER|syscall.SIGCHLD|syscall.CLONE_PIDFD),
|
||||
uintptr(unsafe.Pointer(&pidfd)),
|
||||
)
|
||||
} else {
|
||||
pid, _, err = syscall.RawSyscall(syscall.SYS_CLONE,
|
||||
uintptr(syscall.CLONE_NEWUSER|syscall.SIGCHLD|syscall.CLONE_PIDFD),
|
||||
0,
|
||||
uintptr(unsafe.Pointer(&pidfd)),
|
||||
)
|
||||
}
|
||||
if err != 0 || pid != 0 {
|
||||
afterFork()
|
||||
return pid, pidfd, err
|
||||
}
|
||||
afterForkInChild()
|
||||
|
||||
if _, _, err = syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, uintptr(syscall.SIGKILL), 0); err != 0 {
|
||||
goto err
|
||||
}
|
||||
|
||||
pid, _, err = syscall.RawSyscall(syscall.SYS_GETPPID, 0, 0, 0)
|
||||
if err != 0 {
|
||||
goto err
|
||||
}
|
||||
|
||||
// exit if re-parent
|
||||
if pid != ppid {
|
||||
goto err
|
||||
}
|
||||
|
||||
_, _, err = syscall.RawSyscall(syscall.SYS_PPOLL, 0, 0, 0)
|
||||
err:
|
||||
syscall.RawSyscall(syscall.SYS_EXIT, uintptr(err), 0, 0)
|
||||
panic("unreachable")
|
||||
}
|
||||
Reference in New Issue
Block a user