142 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			142 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
//go:build linux
 | 
						|
 | 
						|
/*
 | 
						|
   Copyright The containerd Authors.
 | 
						|
 | 
						|
   Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
   you may not use this file except in compliance with the License.
 | 
						|
   You may obtain a copy of the License at
 | 
						|
 | 
						|
       http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
   Unless required by applicable law or agreed to in writing, software
 | 
						|
   distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
   See the License for the specific language governing permissions and
 | 
						|
   limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
package v1
 | 
						|
 | 
						|
import (
 | 
						|
	"context"
 | 
						|
	"fmt"
 | 
						|
	"sync"
 | 
						|
 | 
						|
	"github.com/containerd/cgroups/v3/cgroup1"
 | 
						|
	eventstypes "github.com/containerd/containerd/api/events"
 | 
						|
	"github.com/containerd/containerd/pkg/oom"
 | 
						|
	"github.com/containerd/containerd/runtime"
 | 
						|
	"github.com/containerd/containerd/runtime/v2/shim"
 | 
						|
	"github.com/sirupsen/logrus"
 | 
						|
	"golang.org/x/sys/unix"
 | 
						|
)
 | 
						|
 | 
						|
// New returns an epoll implementation that listens to OOM events
 | 
						|
// from a container's cgroups.
 | 
						|
func New(publisher shim.Publisher) (oom.Watcher, error) {
 | 
						|
	fd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
	return &epoller{
 | 
						|
		fd:        fd,
 | 
						|
		publisher: publisher,
 | 
						|
		set:       make(map[uintptr]*item),
 | 
						|
	}, nil
 | 
						|
}
 | 
						|
 | 
						|
// epoller implementation for handling OOM events from a container's cgroup
 | 
						|
type epoller struct {
 | 
						|
	mu sync.Mutex
 | 
						|
 | 
						|
	fd        int
 | 
						|
	publisher shim.Publisher
 | 
						|
	set       map[uintptr]*item
 | 
						|
}
 | 
						|
 | 
						|
type item struct {
 | 
						|
	id string
 | 
						|
	cg cgroup1.Cgroup
 | 
						|
}
 | 
						|
 | 
						|
// Close the epoll fd
 | 
						|
func (e *epoller) Close() error {
 | 
						|
	return unix.Close(e.fd)
 | 
						|
}
 | 
						|
 | 
						|
// Run the epoll loop
 | 
						|
func (e *epoller) Run(ctx context.Context) {
 | 
						|
	var events [128]unix.EpollEvent
 | 
						|
	for {
 | 
						|
		select {
 | 
						|
		case <-ctx.Done():
 | 
						|
			e.Close()
 | 
						|
			return
 | 
						|
		default:
 | 
						|
			n, err := unix.EpollWait(e.fd, events[:], -1)
 | 
						|
			if err != nil {
 | 
						|
				if err == unix.EINTR {
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				logrus.WithError(err).Error("cgroups: epoll wait")
 | 
						|
			}
 | 
						|
			for i := 0; i < n; i++ {
 | 
						|
				e.process(ctx, uintptr(events[i].Fd))
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Add cgroups.Cgroup to the epoll monitor
 | 
						|
func (e *epoller) Add(id string, cgx interface{}) error {
 | 
						|
	cg, ok := cgx.(cgroup1.Cgroup)
 | 
						|
	if !ok {
 | 
						|
		return fmt.Errorf("expected cgroups.Cgroup, got: %T", cgx)
 | 
						|
	}
 | 
						|
	e.mu.Lock()
 | 
						|
	defer e.mu.Unlock()
 | 
						|
	fd, err := cg.OOMEventFD()
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
	e.set[fd] = &item{
 | 
						|
		id: id,
 | 
						|
		cg: cg,
 | 
						|
	}
 | 
						|
	event := unix.EpollEvent{
 | 
						|
		Fd:     int32(fd),
 | 
						|
		Events: unix.EPOLLHUP | unix.EPOLLIN | unix.EPOLLERR,
 | 
						|
	}
 | 
						|
	return unix.EpollCtl(e.fd, unix.EPOLL_CTL_ADD, int(fd), &event)
 | 
						|
}
 | 
						|
 | 
						|
func (e *epoller) process(ctx context.Context, fd uintptr) {
 | 
						|
	flush(fd)
 | 
						|
	e.mu.Lock()
 | 
						|
	i, ok := e.set[fd]
 | 
						|
	if !ok {
 | 
						|
		e.mu.Unlock()
 | 
						|
		return
 | 
						|
	}
 | 
						|
	e.mu.Unlock()
 | 
						|
	if i.cg.State() == cgroup1.Deleted {
 | 
						|
		e.mu.Lock()
 | 
						|
		delete(e.set, fd)
 | 
						|
		e.mu.Unlock()
 | 
						|
		unix.Close(int(fd))
 | 
						|
		return
 | 
						|
	}
 | 
						|
	if err := e.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{
 | 
						|
		ContainerID: i.id,
 | 
						|
	}); err != nil {
 | 
						|
		logrus.WithError(err).Error("publish OOM event")
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func flush(fd uintptr) error {
 | 
						|
	var buf [8]byte
 | 
						|
	_, err := unix.Read(int(fd), buf[:])
 | 
						|
	return err
 | 
						|
}
 |