From 5e3c399d48f29fc36ffd9de8adc7d3e606154828 Mon Sep 17 00:00:00 2001 From: Justin Cormack Date: Tue, 11 Apr 2017 11:07:07 +0100 Subject: [PATCH] Update the opencontainers/runc vendor This fixes the ugly build errors on Alpine Linux which the old version gave from C type mismatches, and now gives a nice neat line of whales on build... Signed-off-by: Justin Cormack --- vendor.conf | 2 +- .../github.com/opencontainers/runc/README.md | 7 + .../runc/libcontainer/README.md | 11 + .../runc/libcontainer/nsenter/nsexec.c | 204 +++++++++++++----- .../runc/libcontainer/utils/cmsg.c | 148 ------------- .../runc/libcontainer/utils/cmsg.go | 74 +++++-- .../runc/libcontainer/utils/cmsg.h | 36 ---- .../runc/libcontainer/utils/utils_unix.go | 10 + 8 files changed, 239 insertions(+), 253 deletions(-) delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h diff --git a/vendor.conf b/vendor.conf index 26f96b5ca..05fa4bfe2 100644 --- a/vendor.conf +++ b/vendor.conf @@ -11,7 +11,7 @@ github.com/matttproud/golang_protobuf_extensions v1.0.0 github.com/docker/go-units v0.3.1 github.com/gogo/protobuf d2e1ade2d719b78fe5b061b4c18a9f7111b5bdc8 github.com/golang/protobuf 8ee79997227bf9b34611aee7946ae64735e6fd93 -github.com/opencontainers/runc ce450bcc6c135cae93ee2a99d41a308c179ff6dc +github.com/opencontainers/runc 50401b5b4c2e01e4f1372b73a021742deeaf4e2d github.com/opencontainers/runtime-spec 035da1dca3dfbb00d752eb58b0b158d6129f3776 github.com/Sirupsen/logrus v0.11.0 github.com/stevvooe/go-btrfs ea304655a3ed8f00773db1844f921d12541ee0d1 diff --git a/vendor/github.com/opencontainers/runc/README.md b/vendor/github.com/opencontainers/runc/README.md index ec9b44ed8..ae1ab2819 100644 --- a/vendor/github.com/opencontainers/runc/README.md +++ b/vendor/github.com/opencontainers/runc/README.md @@ -2,6 +2,7 @@ [![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc) +[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc) ## Introduction @@ -76,6 +77,12 @@ You can run a specific test case by setting the `TESTFLAGS` variable. # make test TESTFLAGS="-run=SomeTestFunction" ``` +### Dependencies Management + +`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management. +Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update +new dependencies. + ## Using runc ### Creating an OCI Bundle diff --git a/vendor/github.com/opencontainers/runc/libcontainer/README.md b/vendor/github.com/opencontainers/runc/libcontainer/README.md index 9435cf7db..d2a7d7889 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/README.md +++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md @@ -1,3 +1,7 @@ +# libcontainer + +[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer) + Libcontainer provides a native Go implementation for creating containers with namespaces, cgroups, capabilities, and filesystem access controls. It allows you to manage the lifecycle of the container performing additional operations @@ -16,7 +20,14 @@ the current binary (/proc/self/exe) to be executed as the init process, and use arg "init", we call the first step process "bootstrap", so you always need a "init" function as the entry of "bootstrap". +In addition to the go init function the early stage bootstrap is handled by importing +[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md). + ```go +import ( + _ "github.com/opencontainers/runc/libcontainer/nsenter" +) + func init() { if len(os.Args) > 1 && os.Args[1] == "init" { runtime.GOMAXPROCS(1) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index 7d15aeb5a..0ad688343 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -33,7 +33,8 @@ enum sync_t { SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */ SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */ SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */ - SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */ + SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */ + SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */ /* XXX: This doesn't help with segfaults and other such issues. */ SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ @@ -71,18 +72,23 @@ struct nlconfig_t { char *namespaces; size_t namespaces_len; uint8_t is_setgroup; + uint8_t is_rootless; + char *oom_score_adj; + size_t oom_score_adj_len; }; /* * List of netlink message types sent to us as part of bootstrapping the init. * These constants are defined in libcontainer/message_linux.go. */ -#define INIT_MSG 62000 +#define INIT_MSG 62000 #define CLONE_FLAGS_ATTR 27281 #define NS_PATHS_ATTR 27282 -#define UIDMAP_ATTR 27283 -#define GIDMAP_ATTR 27284 +#define UIDMAP_ATTR 27283 +#define GIDMAP_ATTR 27284 #define SETGROUP_ATTR 27285 +#define OOM_SCORE_ADJ_ATTR 27286 +#define ROOTLESS_ATTR 27287 /* * Use the raw syscall for versions of glibc which don't include a function for @@ -171,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup) policy = "deny"; break; case SETGROUPS_DEFAULT: + default: /* Nothing to do. */ return; } @@ -185,7 +192,7 @@ static void update_setgroups(int pid, enum policy_t setgroup) } } -static void update_uidmap(int pid, char *map, int map_len) +static void update_uidmap(int pid, char *map, size_t map_len) { if (map == NULL || map_len <= 0) return; @@ -194,7 +201,7 @@ static void update_uidmap(int pid, char *map, int map_len) bail("failed to update /proc/%d/uid_map", pid); } -static void update_gidmap(int pid, char *map, int map_len) +static void update_gidmap(int pid, char *map, size_t map_len) { if (map == NULL || map_len <= 0) return; @@ -203,6 +210,15 @@ static void update_gidmap(int pid, char *map, int map_len) bail("failed to update /proc/%d/gid_map", pid); } +static void update_oom_score_adj(char *data, size_t len) +{ + if (data == NULL || len <= 0) + return; + + if (write_file(data, len, "/proc/self/oom_score_adj") < 0) + bail("failed to update /proc/self/oom_score_adj"); +} + /* A dummy function that just jumps to the given jumpval. */ static int child_func(void *arg) __attribute__ ((noinline)); static int child_func(void *arg) @@ -284,7 +300,7 @@ static void nl_parse(int fd, struct nlconfig_t *config) /* Retrieve the netlink header. */ len = read(fd, &hdr, NLMSG_HDRLEN); if (len != NLMSG_HDRLEN) - bail("invalid netlink header length %lu", len); + bail("invalid netlink header length %zu", len); if (hdr.nlmsg_type == NLMSG_ERROR) bail("failed to read netlink message"); @@ -300,7 +316,7 @@ static void nl_parse(int fd, struct nlconfig_t *config) len = read(fd, data, size); if (len != size) - bail("failed to read netlink payload, %lu != %lu", len, size); + bail("failed to read netlink payload, %zu != %zu", len, size); /* Parse the netlink payload. */ config->data = data; @@ -316,6 +332,13 @@ static void nl_parse(int fd, struct nlconfig_t *config) case CLONE_FLAGS_ATTR: config->cloneflags = readint32(current); break; + case ROOTLESS_ATTR: + config->is_rootless = readint8(current); + break; + case OOM_SCORE_ADJ_ATTR: + config->oom_score_adj = current; + config->oom_score_adj_len = payload_len; + break; case NS_PATHS_ATTR: config->namespaces = current; config->namespaces_len = payload_len; @@ -413,7 +436,7 @@ void nsexec(void) { int pipenum; jmp_buf env; - int syncpipe[2]; + int sync_child_pipe[2], sync_grandchild_pipe[2]; struct nlconfig_t config = {0}; /* @@ -424,18 +447,43 @@ void nsexec(void) if (pipenum == -1) return; - /* make the process non-dumpable */ - if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) { - bail("failed to set process as non-dumpable"); - } - /* Parse all of the netlink configuration. */ nl_parse(pipenum, &config); + /* Set oom_score_adj. This has to be done before !dumpable because + * /proc/self/oom_score_adj is not writeable unless you're an privileged + * user (if !dumpable is set). All children inherit their parent's + * oom_score_adj value on fork(2) so this will always be propagated + * properly. + */ + update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len); + + /* + * Make the process non-dumpable, to avoid various race conditions that + * could cause processes in namespaces we're joining to access host + * resources (or potentially execute code). + * + * However, if the number of namespaces we are joining is 0, we are not + * going to be switching to a different security context. Thus setting + * ourselves to be non-dumpable only breaks things (like rootless + * containers), which is the recommendation from the kernel folks. + */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) + bail("failed to set process as non-dumpable"); + } + /* Pipe so we can tell the child when we've finished setting up. */ - if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0) + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0) bail("failed to setup sync pipe between parent and child"); + /* + * We need a new socketpair to sync with grandchild so we don't have + * race condition with child. + */ + if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0) + bail("failed to setup sync pipe between parent and grandchild"); + /* TODO: Currently we aren't dealing with child deaths properly. */ /* @@ -494,9 +542,10 @@ void nsexec(void) * process. */ case JUMP_PARENT: { - int len, ready = 0; + int len; pid_t child; char buf[JSON_MAX]; + bool ready = false; /* For debugging. */ prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0); @@ -513,30 +562,39 @@ void nsexec(void) * ready, so we can receive all possible error codes * generated by children. */ - while (ready < 2) { + while (!ready) { enum sync_t s; + int ret; - /* This doesn't need to be global, we're in the parent. */ - int syncfd = syncpipe[1]; + syncfd = sync_child_pipe[1]; + close(sync_child_pipe[0]); if (read(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with child: next state"); switch (s) { - case SYNC_ERR: { - /* We have to mirror the error code of the child. */ - int ret; + case SYNC_ERR: + /* We have to mirror the error code of the child. */ + if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) + bail("failed to sync with child: read(error code)"); - if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) - bail("failed to sync with child: read(error code)"); - - exit(ret); - } - break; + exit(ret); case SYNC_USERMAP_PLS: - /* Enable setgroups(2) if we've been asked to. */ + /* + * Enable setgroups(2) if we've been asked to. But we also + * have to explicitly disable setgroups(2) if we're + * creating a rootless container (this is required since + * Linux 3.19). + */ + if (config.is_rootless && config.is_setgroup) { + kill(child, SIGKILL); + bail("cannot allow setgroup in an unprivileged user namespace setup"); + } + if (config.is_setgroup) update_setgroups(child, SETGROUPS_ALLOW); + if (config.is_rootless) + update_setgroups(child, SETGROUPS_DENY); /* Set up mappings. */ update_uidmap(child, config.uidmap, config.uidmap_len); @@ -548,11 +606,6 @@ void nsexec(void) bail("failed to sync with child: write(SYNC_USERMAP_ACK)"); } break; - case SYNC_USERMAP_ACK: - /* We should _never_ receive acks. */ - kill(child, SIGKILL); - bail("failed to sync with child: unexpected SYNC_USERMAP_ACK"); - break; case SYNC_RECVPID_PLS: { pid_t old = child; @@ -570,20 +623,46 @@ void nsexec(void) bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); } } - - ready++; - break; - case SYNC_RECVPID_ACK: - /* We should _never_ receive acks. */ - kill(child, SIGKILL); - bail("failed to sync with child: unexpected SYNC_RECVPID_ACK"); break; case SYNC_CHILD_READY: - ready++; + ready = true; break; default: - bail("unexpected sync value"); + bail("unexpected sync value: %u", s); + } + } + + /* Now sync with grandchild. */ + + ready = false; + while (!ready) { + enum sync_t s; + int ret; + + syncfd = sync_grandchild_pipe[1]; + close(sync_grandchild_pipe[0]); + + s = SYNC_GRANDCHILD; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with child: write(SYNC_GRANDCHILD)"); + } + + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with child: next state"); + + switch (s) { + case SYNC_ERR: + /* We have to mirror the error code of the child. */ + if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) + bail("failed to sync with child: read(error code)"); + + exit(ret); + case SYNC_CHILD_READY: + ready = true; break; + default: + bail("unexpected sync value: %u", s); } } @@ -615,7 +694,8 @@ void nsexec(void) enum sync_t s; /* We're in a child and thus need to tell the parent if we die. */ - syncfd = syncpipe[0]; + syncfd = sync_child_pipe[0]; + close(sync_child_pipe[1]); /* For debugging. */ prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0); @@ -653,6 +733,11 @@ void nsexec(void) * clone_parent rant). So signal our parent to hook us up. */ + /* Switching is only necessary if we joined namespaces. */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) + bail("failed to set process as dumpable"); + } s = SYNC_USERMAP_PLS; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); @@ -663,6 +748,11 @@ void nsexec(void) bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); if (s != SYNC_USERMAP_ACK) bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); + /* Switching is only necessary if we joined namespaces. */ + if (config.namespaces) { + if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) + bail("failed to set process as dumpable"); + } } /* @@ -700,6 +790,12 @@ void nsexec(void) bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); } + s = SYNC_CHILD_READY; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + kill(child, SIGKILL); + bail("failed to sync with parent: write(SYNC_CHILD_READY)"); + } + /* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */ exit(0); } @@ -718,11 +814,19 @@ void nsexec(void) enum sync_t s; /* We're in a child and thus need to tell the parent if we die. */ - syncfd = syncpipe[0]; + syncfd = sync_grandchild_pipe[0]; + close(sync_grandchild_pipe[1]); + close(sync_child_pipe[0]); + close(sync_child_pipe[1]); /* For debugging. */ prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0); + if (read(syncfd, &s, sizeof(s)) != sizeof(s)) + bail("failed to sync with parent: read(SYNC_GRANDCHILD)"); + if (s != SYNC_GRANDCHILD) + bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s); + if (setsid() < 0) bail("setsid failed"); @@ -732,16 +836,17 @@ void nsexec(void) if (setgid(0) < 0) bail("setgid failed"); - if (setgroups(0, NULL) < 0) - bail("setgroups failed"); + if (!config.is_rootless && config.is_setgroup) { + if (setgroups(0, NULL) < 0) + bail("setgroups failed"); + } s = SYNC_CHILD_READY; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with patent: write(SYNC_CHILD_READY)"); /* Close sync pipes. */ - close(syncpipe[0]); - close(syncpipe[1]); + close(sync_grandchild_pipe[0]); /* Free netlink data. */ nl_free(&config); @@ -751,7 +856,6 @@ void nsexec(void) } default: bail("unexpected jump value"); - break; } /* Should never be reached. */ diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c deleted file mode 100644 index e77ca69f8..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright 2016 SUSE LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "cmsg.h" - -#define error(fmt, ...) \ - ({ \ - fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \ - errno = ECOMM; \ - goto err; /* return value */ \ - }) - -/* - * Sends a file descriptor along the sockfd provided. Returns the return - * value of sendmsg(2). Any synchronisation and preparation of state - * should be done external to this (we expect the other side to be in - * recvfd() in the code). - */ -ssize_t sendfd(int sockfd, struct file_t file) -{ - struct msghdr msg = {0}; - struct iovec iov[1] = {0}; - struct cmsghdr *cmsg; - int *fdptr; - int ret; - - union { - char buf[CMSG_SPACE(sizeof(file.fd))]; - struct cmsghdr align; - } u; - - /* - * We need to send some other data along with the ancillary data, - * otherwise the other side won't recieve any data. This is very - * well-hidden in the documentation (and only applies to - * SOCK_STREAM). See the bottom part of unix(7). - */ - iov[0].iov_base = file.name; - iov[0].iov_len = strlen(file.name) + 1; - - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_iov = iov; - msg.msg_iovlen = 1; - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - cmsg->cmsg_len = CMSG_LEN(sizeof(int)); - - fdptr = (int *) CMSG_DATA(cmsg); - memcpy(fdptr, &file.fd, sizeof(int)); - - return sendmsg(sockfd, &msg, 0); -} - -/* - * Receives a file descriptor from the sockfd provided. Returns the file - * descriptor as sent from sendfd(). It will return the file descriptor - * or die (literally) trying. Any synchronisation and preparation of - * state should be done external to this (we expect the other side to be - * in sendfd() in the code). - */ -struct file_t recvfd(int sockfd) -{ - struct msghdr msg = {0}; - struct iovec iov[1] = {0}; - struct cmsghdr *cmsg; - struct file_t file = {0}; - int *fdptr; - int olderrno; - - union { - char buf[CMSG_SPACE(sizeof(file.fd))]; - struct cmsghdr align; - } u; - - /* Allocate a buffer. */ - /* TODO: Make this dynamic with MSG_PEEK. */ - file.name = malloc(TAG_BUFFER); - if (!file.name) - error("recvfd: failed to allocate file.tag buffer\n"); - - /* - * We need to "recieve" the non-ancillary data even though we don't - * plan to use it at all. Otherwise, things won't work as expected. - * See unix(7) and other well-hidden documentation. - */ - iov[0].iov_base = file.name; - iov[0].iov_len = TAG_BUFFER; - - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_iov = iov; - msg.msg_iovlen = 1; - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - - ssize_t ret = recvmsg(sockfd, &msg, 0); - if (ret < 0) - goto err; - - cmsg = CMSG_FIRSTHDR(&msg); - if (!cmsg) - error("recvfd: got NULL from CMSG_FIRSTHDR"); - if (cmsg->cmsg_level != SOL_SOCKET) - error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level); - if (cmsg->cmsg_type != SCM_RIGHTS) - error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type); - if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) - error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len); - - fdptr = (int *) CMSG_DATA(cmsg); - if (!fdptr || *fdptr < 0) - error("recvfd: recieved invalid pointer"); - - file.fd = *fdptr; - return file; - -err: - olderrno = errno; - free(file.name); - errno = olderrno; - return (struct file_t){0}; -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go index ee8937417..2cbb6491a 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go @@ -3,7 +3,7 @@ package utils /* - * Copyright 2016 SUSE LLC + * Copyright 2016, 2017 SUSE LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,28 +18,66 @@ package utils * limitations under the License. */ -/* -#include -#include -#include "cmsg.h" -*/ -import "C" - import ( + "fmt" "os" - "unsafe" + + "golang.org/x/sys/unix" ) +// MaxSendfdLen is the maximum length of the name of a file descriptor being +// sent using SendFd. The name of the file handle returned by RecvFd will never +// be larger than this value. +const MaxNameLen = 4096 + +// oobSpace is the size of the oob slice required to store a single FD. Note +// that unix.UnixRights appears to make the assumption that fd is always int32, +// so sizeof(fd) = 4. +var oobSpace = unix.CmsgSpace(4) + // RecvFd waits for a file descriptor to be sent over the given AF_UNIX // socket. The file name of the remote file descriptor will be recreated // locally (it is sent as non-auxiliary data in the same payload). func RecvFd(socket *os.File) (*os.File, error) { - file, err := C.recvfd(C.int(socket.Fd())) + // For some reason, unix.Recvmsg uses the length rather than the capacity + // when passing the msg_controllen and other attributes to recvmsg. So we + // have to actually set the length. + name := make([]byte, MaxNameLen) + oob := make([]byte, oobSpace) + + sockfd := socket.Fd() + n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0) if err != nil { return nil, err } - defer C.free(unsafe.Pointer(file.name)) - return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil + + if n >= MaxNameLen || oobn != oobSpace { + return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) + } + + // Truncate. + name = name[:n] + oob = oob[:oobn] + + scms, err := unix.ParseSocketControlMessage(oob) + if err != nil { + return nil, err + } + if len(scms) != 1 { + return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms)) + } + scm := scms[0] + + fds, err := unix.ParseUnixRights(&scm) + if err != nil { + return nil, err + } + if len(fds) != 1 { + return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds)) + } + fd := uintptr(fds[0]) + + return os.NewFile(fd, string(name)), nil } // SendFd sends a file descriptor over the given AF_UNIX socket. In @@ -47,11 +85,11 @@ func RecvFd(socket *os.File) (*os.File, error) { // non-auxiliary data in the same payload (allowing to send contextual // information for a file descriptor). func SendFd(socket, file *os.File) error { - var cfile C.struct_file_t - cfile.fd = C.int(file.Fd()) - cfile.name = C.CString(file.Name()) - defer C.free(unsafe.Pointer(cfile.name)) + name := []byte(file.Name()) + if len(name) >= MaxNameLen { + return fmt.Errorf("sendfd: filename too long: %s", file.Name()) + } + oob := unix.UnixRights(int(file.Fd())) - _, err := C.sendfd(C.int(socket.Fd()), cfile) - return err + return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0) } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h deleted file mode 100644 index 3fe764254..000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2016 SUSE LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#if !defined(CMSG_H) -#define CMSG_H - -#include - -/* TODO: Implement this properly with MSG_PEEK. */ -#define TAG_BUFFER 4096 - -/* This mirrors Go's (*os.File). */ -struct file_t { - char *name; - int fd; -}; - -struct file_t recvfd(int sockfd); -ssize_t sendfd(int sockfd, struct file_t file); - -#endif /* !defined(CMSG_H) */ diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go index 408918f27..7b798cc79 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go @@ -4,6 +4,7 @@ package utils import ( "io/ioutil" + "os" "strconv" "syscall" ) @@ -31,3 +32,12 @@ func CloseExecFrom(minFd int) error { } return nil } + +// NewSockPair returns a new unix socket pair +func NewSockPair(name string) (parent *os.File, child *os.File, err error) { + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil +}