Update the opencontainers/runc vendor

This fixes the ugly build errors on Alpine Linux which the old version gave
from C type mismatches, and now gives a nice neat line of whales on build...

Signed-off-by: Justin Cormack <justin.cormack@docker.com>
This commit is contained in:
Justin Cormack 2017-04-11 11:07:07 +01:00
parent 62918511f3
commit 5e3c399d48
8 changed files with 239 additions and 253 deletions

View File

@ -11,7 +11,7 @@ github.com/matttproud/golang_protobuf_extensions v1.0.0
github.com/docker/go-units v0.3.1 github.com/docker/go-units v0.3.1
github.com/gogo/protobuf d2e1ade2d719b78fe5b061b4c18a9f7111b5bdc8 github.com/gogo/protobuf d2e1ade2d719b78fe5b061b4c18a9f7111b5bdc8
github.com/golang/protobuf 8ee79997227bf9b34611aee7946ae64735e6fd93 github.com/golang/protobuf 8ee79997227bf9b34611aee7946ae64735e6fd93
github.com/opencontainers/runc ce450bcc6c135cae93ee2a99d41a308c179ff6dc github.com/opencontainers/runc 50401b5b4c2e01e4f1372b73a021742deeaf4e2d
github.com/opencontainers/runtime-spec 035da1dca3dfbb00d752eb58b0b158d6129f3776 github.com/opencontainers/runtime-spec 035da1dca3dfbb00d752eb58b0b158d6129f3776
github.com/Sirupsen/logrus v0.11.0 github.com/Sirupsen/logrus v0.11.0
github.com/stevvooe/go-btrfs ea304655a3ed8f00773db1844f921d12541ee0d1 github.com/stevvooe/go-btrfs ea304655a3ed8f00773db1844f921d12541ee0d1

View File

@ -2,6 +2,7 @@
[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc) [![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
## Introduction ## Introduction
@ -76,6 +77,12 @@ You can run a specific test case by setting the `TESTFLAGS` variable.
# make test TESTFLAGS="-run=SomeTestFunction" # make test TESTFLAGS="-run=SomeTestFunction"
``` ```
### Dependencies Management
`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
new dependencies.
## Using runc ## Using runc
### Creating an OCI Bundle ### Creating an OCI Bundle

View File

@ -1,3 +1,7 @@
# libcontainer
[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer)
Libcontainer provides a native Go implementation for creating containers Libcontainer provides a native Go implementation for creating containers
with namespaces, cgroups, capabilities, and filesystem access controls. with namespaces, cgroups, capabilities, and filesystem access controls.
It allows you to manage the lifecycle of the container performing additional operations It allows you to manage the lifecycle of the container performing additional operations
@ -16,7 +20,14 @@ the current binary (/proc/self/exe) to be executed as the init process, and use
arg "init", we call the first step process "bootstrap", so you always need a "init" arg "init", we call the first step process "bootstrap", so you always need a "init"
function as the entry of "bootstrap". function as the entry of "bootstrap".
In addition to the go init function the early stage bootstrap is handled by importing
[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md).
```go ```go
import (
_ "github.com/opencontainers/runc/libcontainer/nsenter"
)
func init() { func init() {
if len(os.Args) > 1 && os.Args[1] == "init" { if len(os.Args) > 1 && os.Args[1] == "init" {
runtime.GOMAXPROCS(1) runtime.GOMAXPROCS(1)

View File

@ -33,7 +33,8 @@ enum sync_t {
SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */ SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */ SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */ SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */ SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */
SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */
/* XXX: This doesn't help with segfaults and other such issues. */ /* XXX: This doesn't help with segfaults and other such issues. */
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */ SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
@ -71,6 +72,9 @@ struct nlconfig_t {
char *namespaces; char *namespaces;
size_t namespaces_len; size_t namespaces_len;
uint8_t is_setgroup; uint8_t is_setgroup;
uint8_t is_rootless;
char *oom_score_adj;
size_t oom_score_adj_len;
}; };
/* /*
@ -83,6 +87,8 @@ struct nlconfig_t {
#define UIDMAP_ATTR 27283 #define UIDMAP_ATTR 27283
#define GIDMAP_ATTR 27284 #define GIDMAP_ATTR 27284
#define SETGROUP_ATTR 27285 #define SETGROUP_ATTR 27285
#define OOM_SCORE_ADJ_ATTR 27286
#define ROOTLESS_ATTR 27287
/* /*
* Use the raw syscall for versions of glibc which don't include a function for * Use the raw syscall for versions of glibc which don't include a function for
@ -171,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
policy = "deny"; policy = "deny";
break; break;
case SETGROUPS_DEFAULT: case SETGROUPS_DEFAULT:
default:
/* Nothing to do. */ /* Nothing to do. */
return; return;
} }
@ -185,7 +192,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
} }
} }
static void update_uidmap(int pid, char *map, int map_len) static void update_uidmap(int pid, char *map, size_t map_len)
{ {
if (map == NULL || map_len <= 0) if (map == NULL || map_len <= 0)
return; return;
@ -194,7 +201,7 @@ static void update_uidmap(int pid, char *map, int map_len)
bail("failed to update /proc/%d/uid_map", pid); bail("failed to update /proc/%d/uid_map", pid);
} }
static void update_gidmap(int pid, char *map, int map_len) static void update_gidmap(int pid, char *map, size_t map_len)
{ {
if (map == NULL || map_len <= 0) if (map == NULL || map_len <= 0)
return; return;
@ -203,6 +210,15 @@ static void update_gidmap(int pid, char *map, int map_len)
bail("failed to update /proc/%d/gid_map", pid); bail("failed to update /proc/%d/gid_map", pid);
} }
static void update_oom_score_adj(char *data, size_t len)
{
if (data == NULL || len <= 0)
return;
if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
bail("failed to update /proc/self/oom_score_adj");
}
/* A dummy function that just jumps to the given jumpval. */ /* A dummy function that just jumps to the given jumpval. */
static int child_func(void *arg) __attribute__ ((noinline)); static int child_func(void *arg) __attribute__ ((noinline));
static int child_func(void *arg) static int child_func(void *arg)
@ -284,7 +300,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)
/* Retrieve the netlink header. */ /* Retrieve the netlink header. */
len = read(fd, &hdr, NLMSG_HDRLEN); len = read(fd, &hdr, NLMSG_HDRLEN);
if (len != NLMSG_HDRLEN) if (len != NLMSG_HDRLEN)
bail("invalid netlink header length %lu", len); bail("invalid netlink header length %zu", len);
if (hdr.nlmsg_type == NLMSG_ERROR) if (hdr.nlmsg_type == NLMSG_ERROR)
bail("failed to read netlink message"); bail("failed to read netlink message");
@ -300,7 +316,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)
len = read(fd, data, size); len = read(fd, data, size);
if (len != size) if (len != size)
bail("failed to read netlink payload, %lu != %lu", len, size); bail("failed to read netlink payload, %zu != %zu", len, size);
/* Parse the netlink payload. */ /* Parse the netlink payload. */
config->data = data; config->data = data;
@ -316,6 +332,13 @@ static void nl_parse(int fd, struct nlconfig_t *config)
case CLONE_FLAGS_ATTR: case CLONE_FLAGS_ATTR:
config->cloneflags = readint32(current); config->cloneflags = readint32(current);
break; break;
case ROOTLESS_ATTR:
config->is_rootless = readint8(current);
break;
case OOM_SCORE_ADJ_ATTR:
config->oom_score_adj = current;
config->oom_score_adj_len = payload_len;
break;
case NS_PATHS_ATTR: case NS_PATHS_ATTR:
config->namespaces = current; config->namespaces = current;
config->namespaces_len = payload_len; config->namespaces_len = payload_len;
@ -413,7 +436,7 @@ void nsexec(void)
{ {
int pipenum; int pipenum;
jmp_buf env; jmp_buf env;
int syncpipe[2]; int sync_child_pipe[2], sync_grandchild_pipe[2];
struct nlconfig_t config = {0}; struct nlconfig_t config = {0};
/* /*
@ -424,18 +447,43 @@ void nsexec(void)
if (pipenum == -1) if (pipenum == -1)
return; return;
/* make the process non-dumpable */
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) {
bail("failed to set process as non-dumpable");
}
/* Parse all of the netlink configuration. */ /* Parse all of the netlink configuration. */
nl_parse(pipenum, &config); nl_parse(pipenum, &config);
/* Set oom_score_adj. This has to be done before !dumpable because
* /proc/self/oom_score_adj is not writeable unless you're an privileged
* user (if !dumpable is set). All children inherit their parent's
* oom_score_adj value on fork(2) so this will always be propagated
* properly.
*/
update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
/*
* Make the process non-dumpable, to avoid various race conditions that
* could cause processes in namespaces we're joining to access host
* resources (or potentially execute code).
*
* However, if the number of namespaces we are joining is 0, we are not
* going to be switching to a different security context. Thus setting
* ourselves to be non-dumpable only breaks things (like rootless
* containers), which is the recommendation from the kernel folks.
*/
if (config.namespaces) {
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
bail("failed to set process as non-dumpable");
}
/* Pipe so we can tell the child when we've finished setting up. */ /* Pipe so we can tell the child when we've finished setting up. */
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0) if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
bail("failed to setup sync pipe between parent and child"); bail("failed to setup sync pipe between parent and child");
/*
* We need a new socketpair to sync with grandchild so we don't have
* race condition with child.
*/
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0)
bail("failed to setup sync pipe between parent and grandchild");
/* TODO: Currently we aren't dealing with child deaths properly. */ /* TODO: Currently we aren't dealing with child deaths properly. */
/* /*
@ -494,9 +542,10 @@ void nsexec(void)
* process. * process.
*/ */
case JUMP_PARENT: { case JUMP_PARENT: {
int len, ready = 0; int len;
pid_t child; pid_t child;
char buf[JSON_MAX]; char buf[JSON_MAX];
bool ready = false;
/* For debugging. */ /* For debugging. */
prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0); prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
@ -513,30 +562,39 @@ void nsexec(void)
* ready, so we can receive all possible error codes * ready, so we can receive all possible error codes
* generated by children. * generated by children.
*/ */
while (ready < 2) { while (!ready) {
enum sync_t s; enum sync_t s;
int ret;
/* This doesn't need to be global, we're in the parent. */ syncfd = sync_child_pipe[1];
int syncfd = syncpipe[1]; close(sync_child_pipe[0]);
if (read(syncfd, &s, sizeof(s)) != sizeof(s)) if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with child: next state"); bail("failed to sync with child: next state");
switch (s) { switch (s) {
case SYNC_ERR: { case SYNC_ERR:
/* We have to mirror the error code of the child. */ /* We have to mirror the error code of the child. */
int ret;
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret)) if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
bail("failed to sync with child: read(error code)"); bail("failed to sync with child: read(error code)");
exit(ret); exit(ret);
}
break;
case SYNC_USERMAP_PLS: case SYNC_USERMAP_PLS:
/* Enable setgroups(2) if we've been asked to. */ /*
* Enable setgroups(2) if we've been asked to. But we also
* have to explicitly disable setgroups(2) if we're
* creating a rootless container (this is required since
* Linux 3.19).
*/
if (config.is_rootless && config.is_setgroup) {
kill(child, SIGKILL);
bail("cannot allow setgroup in an unprivileged user namespace setup");
}
if (config.is_setgroup) if (config.is_setgroup)
update_setgroups(child, SETGROUPS_ALLOW); update_setgroups(child, SETGROUPS_ALLOW);
if (config.is_rootless)
update_setgroups(child, SETGROUPS_DENY);
/* Set up mappings. */ /* Set up mappings. */
update_uidmap(child, config.uidmap, config.uidmap_len); update_uidmap(child, config.uidmap, config.uidmap_len);
@ -548,11 +606,6 @@ void nsexec(void)
bail("failed to sync with child: write(SYNC_USERMAP_ACK)"); bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
} }
break; break;
case SYNC_USERMAP_ACK:
/* We should _never_ receive acks. */
kill(child, SIGKILL);
bail("failed to sync with child: unexpected SYNC_USERMAP_ACK");
break;
case SYNC_RECVPID_PLS: { case SYNC_RECVPID_PLS: {
pid_t old = child; pid_t old = child;
@ -570,20 +623,46 @@ void nsexec(void)
bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
} }
} }
ready++;
break;
case SYNC_RECVPID_ACK:
/* We should _never_ receive acks. */
kill(child, SIGKILL);
bail("failed to sync with child: unexpected SYNC_RECVPID_ACK");
break; break;
case SYNC_CHILD_READY: case SYNC_CHILD_READY:
ready++; ready = true;
break; break;
default: default:
bail("unexpected sync value"); bail("unexpected sync value: %u", s);
}
}
/* Now sync with grandchild. */
ready = false;
while (!ready) {
enum sync_t s;
int ret;
syncfd = sync_grandchild_pipe[1];
close(sync_grandchild_pipe[0]);
s = SYNC_GRANDCHILD;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
kill(child, SIGKILL);
bail("failed to sync with child: write(SYNC_GRANDCHILD)");
}
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with child: next state");
switch (s) {
case SYNC_ERR:
/* We have to mirror the error code of the child. */
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
bail("failed to sync with child: read(error code)");
exit(ret);
case SYNC_CHILD_READY:
ready = true;
break; break;
default:
bail("unexpected sync value: %u", s);
} }
} }
@ -615,7 +694,8 @@ void nsexec(void)
enum sync_t s; enum sync_t s;
/* We're in a child and thus need to tell the parent if we die. */ /* We're in a child and thus need to tell the parent if we die. */
syncfd = syncpipe[0]; syncfd = sync_child_pipe[0];
close(sync_child_pipe[1]);
/* For debugging. */ /* For debugging. */
prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0); prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
@ -653,6 +733,11 @@ void nsexec(void)
* clone_parent rant). So signal our parent to hook us up. * clone_parent rant). So signal our parent to hook us up.
*/ */
/* Switching is only necessary if we joined namespaces. */
if (config.namespaces) {
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
bail("failed to set process as dumpable");
}
s = SYNC_USERMAP_PLS; s = SYNC_USERMAP_PLS;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) if (write(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
@ -663,6 +748,11 @@ void nsexec(void)
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
if (s != SYNC_USERMAP_ACK) if (s != SYNC_USERMAP_ACK)
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
/* Switching is only necessary if we joined namespaces. */
if (config.namespaces) {
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
bail("failed to set process as dumpable");
}
} }
/* /*
@ -700,6 +790,12 @@ void nsexec(void)
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
} }
s = SYNC_CHILD_READY;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
kill(child, SIGKILL);
bail("failed to sync with parent: write(SYNC_CHILD_READY)");
}
/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */ /* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
exit(0); exit(0);
} }
@ -718,11 +814,19 @@ void nsexec(void)
enum sync_t s; enum sync_t s;
/* We're in a child and thus need to tell the parent if we die. */ /* We're in a child and thus need to tell the parent if we die. */
syncfd = syncpipe[0]; syncfd = sync_grandchild_pipe[0];
close(sync_grandchild_pipe[1]);
close(sync_child_pipe[0]);
close(sync_child_pipe[1]);
/* For debugging. */ /* For debugging. */
prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0); prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
if (s != SYNC_GRANDCHILD)
bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);
if (setsid() < 0) if (setsid() < 0)
bail("setsid failed"); bail("setsid failed");
@ -732,16 +836,17 @@ void nsexec(void)
if (setgid(0) < 0) if (setgid(0) < 0)
bail("setgid failed"); bail("setgid failed");
if (!config.is_rootless && config.is_setgroup) {
if (setgroups(0, NULL) < 0) if (setgroups(0, NULL) < 0)
bail("setgroups failed"); bail("setgroups failed");
}
s = SYNC_CHILD_READY; s = SYNC_CHILD_READY;
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) if (write(syncfd, &s, sizeof(s)) != sizeof(s))
bail("failed to sync with patent: write(SYNC_CHILD_READY)"); bail("failed to sync with patent: write(SYNC_CHILD_READY)");
/* Close sync pipes. */ /* Close sync pipes. */
close(syncpipe[0]); close(sync_grandchild_pipe[0]);
close(syncpipe[1]);
/* Free netlink data. */ /* Free netlink data. */
nl_free(&config); nl_free(&config);
@ -751,7 +856,6 @@ void nsexec(void)
} }
default: default:
bail("unexpected jump value"); bail("unexpected jump value");
break;
} }
/* Should never be reached. */ /* Should never be reached. */

View File

@ -1,148 +0,0 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#include "cmsg.h"
#define error(fmt, ...) \
({ \
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
errno = ECOMM; \
goto err; /* return value */ \
})
/*
* Sends a file descriptor along the sockfd provided. Returns the return
* value of sendmsg(2). Any synchronisation and preparation of state
* should be done external to this (we expect the other side to be in
* recvfd() in the code).
*/
ssize_t sendfd(int sockfd, struct file_t file)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
int *fdptr;
int ret;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/*
* We need to send some other data along with the ancillary data,
* otherwise the other side won't recieve any data. This is very
* well-hidden in the documentation (and only applies to
* SOCK_STREAM). See the bottom part of unix(7).
*/
iov[0].iov_base = file.name;
iov[0].iov_len = strlen(file.name) + 1;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
fdptr = (int *) CMSG_DATA(cmsg);
memcpy(fdptr, &file.fd, sizeof(int));
return sendmsg(sockfd, &msg, 0);
}
/*
* Receives a file descriptor from the sockfd provided. Returns the file
* descriptor as sent from sendfd(). It will return the file descriptor
* or die (literally) trying. Any synchronisation and preparation of
* state should be done external to this (we expect the other side to be
* in sendfd() in the code).
*/
struct file_t recvfd(int sockfd)
{
struct msghdr msg = {0};
struct iovec iov[1] = {0};
struct cmsghdr *cmsg;
struct file_t file = {0};
int *fdptr;
int olderrno;
union {
char buf[CMSG_SPACE(sizeof(file.fd))];
struct cmsghdr align;
} u;
/* Allocate a buffer. */
/* TODO: Make this dynamic with MSG_PEEK. */
file.name = malloc(TAG_BUFFER);
if (!file.name)
error("recvfd: failed to allocate file.tag buffer\n");
/*
* We need to "recieve" the non-ancillary data even though we don't
* plan to use it at all. Otherwise, things won't work as expected.
* See unix(7) and other well-hidden documentation.
*/
iov[0].iov_base = file.name;
iov[0].iov_len = TAG_BUFFER;
msg.msg_name = NULL;
msg.msg_namelen = 0;
msg.msg_iov = iov;
msg.msg_iovlen = 1;
msg.msg_control = u.buf;
msg.msg_controllen = sizeof(u.buf);
ssize_t ret = recvmsg(sockfd, &msg, 0);
if (ret < 0)
goto err;
cmsg = CMSG_FIRSTHDR(&msg);
if (!cmsg)
error("recvfd: got NULL from CMSG_FIRSTHDR");
if (cmsg->cmsg_level != SOL_SOCKET)
error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
if (cmsg->cmsg_type != SCM_RIGHTS)
error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len);
fdptr = (int *) CMSG_DATA(cmsg);
if (!fdptr || *fdptr < 0)
error("recvfd: recieved invalid pointer");
file.fd = *fdptr;
return file;
err:
olderrno = errno;
free(file.name);
errno = olderrno;
return (struct file_t){0};
}

View File

@ -3,7 +3,7 @@
package utils package utils
/* /*
* Copyright 2016 SUSE LLC * Copyright 2016, 2017 SUSE LLC
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -18,28 +18,66 @@ package utils
* limitations under the License. * limitations under the License.
*/ */
/*
#include <errno.h>
#include <stdlib.h>
#include "cmsg.h"
*/
import "C"
import ( import (
"fmt"
"os" "os"
"unsafe"
"golang.org/x/sys/unix"
) )
// MaxSendfdLen is the maximum length of the name of a file descriptor being
// sent using SendFd. The name of the file handle returned by RecvFd will never
// be larger than this value.
const MaxNameLen = 4096
// oobSpace is the size of the oob slice required to store a single FD. Note
// that unix.UnixRights appears to make the assumption that fd is always int32,
// so sizeof(fd) = 4.
var oobSpace = unix.CmsgSpace(4)
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX // RecvFd waits for a file descriptor to be sent over the given AF_UNIX
// socket. The file name of the remote file descriptor will be recreated // socket. The file name of the remote file descriptor will be recreated
// locally (it is sent as non-auxiliary data in the same payload). // locally (it is sent as non-auxiliary data in the same payload).
func RecvFd(socket *os.File) (*os.File, error) { func RecvFd(socket *os.File) (*os.File, error) {
file, err := C.recvfd(C.int(socket.Fd())) // For some reason, unix.Recvmsg uses the length rather than the capacity
// when passing the msg_controllen and other attributes to recvmsg. So we
// have to actually set the length.
name := make([]byte, MaxNameLen)
oob := make([]byte, oobSpace)
sockfd := socket.Fd()
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer C.free(unsafe.Pointer(file.name))
return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil if n >= MaxNameLen || oobn != oobSpace {
return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
}
// Truncate.
name = name[:n]
oob = oob[:oobn]
scms, err := unix.ParseSocketControlMessage(oob)
if err != nil {
return nil, err
}
if len(scms) != 1 {
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
}
scm := scms[0]
fds, err := unix.ParseUnixRights(&scm)
if err != nil {
return nil, err
}
if len(fds) != 1 {
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
}
fd := uintptr(fds[0])
return os.NewFile(fd, string(name)), nil
} }
// SendFd sends a file descriptor over the given AF_UNIX socket. In // SendFd sends a file descriptor over the given AF_UNIX socket. In
@ -47,11 +85,11 @@ func RecvFd(socket *os.File) (*os.File, error) {
// non-auxiliary data in the same payload (allowing to send contextual // non-auxiliary data in the same payload (allowing to send contextual
// information for a file descriptor). // information for a file descriptor).
func SendFd(socket, file *os.File) error { func SendFd(socket, file *os.File) error {
var cfile C.struct_file_t name := []byte(file.Name())
cfile.fd = C.int(file.Fd()) if len(name) >= MaxNameLen {
cfile.name = C.CString(file.Name()) return fmt.Errorf("sendfd: filename too long: %s", file.Name())
defer C.free(unsafe.Pointer(cfile.name)) }
oob := unix.UnixRights(int(file.Fd()))
_, err := C.sendfd(C.int(socket.Fd()), cfile) return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0)
return err
} }

View File

@ -1,36 +0,0 @@
/*
* Copyright 2016 SUSE LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#if !defined(CMSG_H)
#define CMSG_H
#include <sys/types.h>
/* TODO: Implement this properly with MSG_PEEK. */
#define TAG_BUFFER 4096
/* This mirrors Go's (*os.File). */
struct file_t {
char *name;
int fd;
};
struct file_t recvfd(int sockfd);
ssize_t sendfd(int sockfd, struct file_t file);
#endif /* !defined(CMSG_H) */

View File

@ -4,6 +4,7 @@ package utils
import ( import (
"io/ioutil" "io/ioutil"
"os"
"strconv" "strconv"
"syscall" "syscall"
) )
@ -31,3 +32,12 @@ func CloseExecFrom(minFd int) error {
} }
return nil return nil
} }
// NewSockPair returns a new unix socket pair
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
}