Merge pull request #715 from justincormack/runc-vendor-up
Update the opencontainers/runc vendor
This commit is contained in:
commit
f108e08703
@ -11,7 +11,7 @@ github.com/matttproud/golang_protobuf_extensions v1.0.0
|
|||||||
github.com/docker/go-units v0.3.1
|
github.com/docker/go-units v0.3.1
|
||||||
github.com/gogo/protobuf d2e1ade2d719b78fe5b061b4c18a9f7111b5bdc8
|
github.com/gogo/protobuf d2e1ade2d719b78fe5b061b4c18a9f7111b5bdc8
|
||||||
github.com/golang/protobuf 8ee79997227bf9b34611aee7946ae64735e6fd93
|
github.com/golang/protobuf 8ee79997227bf9b34611aee7946ae64735e6fd93
|
||||||
github.com/opencontainers/runc ce450bcc6c135cae93ee2a99d41a308c179ff6dc
|
github.com/opencontainers/runc 50401b5b4c2e01e4f1372b73a021742deeaf4e2d
|
||||||
github.com/opencontainers/runtime-spec 035da1dca3dfbb00d752eb58b0b158d6129f3776
|
github.com/opencontainers/runtime-spec 035da1dca3dfbb00d752eb58b0b158d6129f3776
|
||||||
github.com/Sirupsen/logrus v0.11.0
|
github.com/Sirupsen/logrus v0.11.0
|
||||||
github.com/stevvooe/go-btrfs ea304655a3ed8f00773db1844f921d12541ee0d1
|
github.com/stevvooe/go-btrfs ea304655a3ed8f00773db1844f921d12541ee0d1
|
||||||
|
7
vendor/github.com/opencontainers/runc/README.md
generated
vendored
7
vendor/github.com/opencontainers/runc/README.md
generated
vendored
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
[](https://travis-ci.org/opencontainers/runc)
|
[](https://travis-ci.org/opencontainers/runc)
|
||||||
[](https://goreportcard.com/report/github.com/opencontainers/runc)
|
[](https://goreportcard.com/report/github.com/opencontainers/runc)
|
||||||
|
[](https://godoc.org/github.com/opencontainers/runc)
|
||||||
|
|
||||||
## Introduction
|
## Introduction
|
||||||
|
|
||||||
@ -76,6 +77,12 @@ You can run a specific test case by setting the `TESTFLAGS` variable.
|
|||||||
# make test TESTFLAGS="-run=SomeTestFunction"
|
# make test TESTFLAGS="-run=SomeTestFunction"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Dependencies Management
|
||||||
|
|
||||||
|
`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
|
||||||
|
Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
|
||||||
|
new dependencies.
|
||||||
|
|
||||||
## Using runc
|
## Using runc
|
||||||
|
|
||||||
### Creating an OCI Bundle
|
### Creating an OCI Bundle
|
||||||
|
11
vendor/github.com/opencontainers/runc/libcontainer/README.md
generated
vendored
11
vendor/github.com/opencontainers/runc/libcontainer/README.md
generated
vendored
@ -1,3 +1,7 @@
|
|||||||
|
# libcontainer
|
||||||
|
|
||||||
|
[](https://godoc.org/github.com/opencontainers/runc/libcontainer)
|
||||||
|
|
||||||
Libcontainer provides a native Go implementation for creating containers
|
Libcontainer provides a native Go implementation for creating containers
|
||||||
with namespaces, cgroups, capabilities, and filesystem access controls.
|
with namespaces, cgroups, capabilities, and filesystem access controls.
|
||||||
It allows you to manage the lifecycle of the container performing additional operations
|
It allows you to manage the lifecycle of the container performing additional operations
|
||||||
@ -16,7 +20,14 @@ the current binary (/proc/self/exe) to be executed as the init process, and use
|
|||||||
arg "init", we call the first step process "bootstrap", so you always need a "init"
|
arg "init", we call the first step process "bootstrap", so you always need a "init"
|
||||||
function as the entry of "bootstrap".
|
function as the entry of "bootstrap".
|
||||||
|
|
||||||
|
In addition to the go init function the early stage bootstrap is handled by importing
|
||||||
|
[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md).
|
||||||
|
|
||||||
```go
|
```go
|
||||||
|
import (
|
||||||
|
_ "github.com/opencontainers/runc/libcontainer/nsenter"
|
||||||
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
if len(os.Args) > 1 && os.Args[1] == "init" {
|
if len(os.Args) > 1 && os.Args[1] == "init" {
|
||||||
runtime.GOMAXPROCS(1)
|
runtime.GOMAXPROCS(1)
|
||||||
|
186
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
generated
vendored
186
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
generated
vendored
@ -33,7 +33,8 @@ enum sync_t {
|
|||||||
SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
|
SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
|
||||||
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
|
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
|
||||||
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
|
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
|
||||||
SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */
|
SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */
|
||||||
|
SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */
|
||||||
|
|
||||||
/* XXX: This doesn't help with segfaults and other such issues. */
|
/* XXX: This doesn't help with segfaults and other such issues. */
|
||||||
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
|
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
|
||||||
@ -71,6 +72,9 @@ struct nlconfig_t {
|
|||||||
char *namespaces;
|
char *namespaces;
|
||||||
size_t namespaces_len;
|
size_t namespaces_len;
|
||||||
uint8_t is_setgroup;
|
uint8_t is_setgroup;
|
||||||
|
uint8_t is_rootless;
|
||||||
|
char *oom_score_adj;
|
||||||
|
size_t oom_score_adj_len;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -83,6 +87,8 @@ struct nlconfig_t {
|
|||||||
#define UIDMAP_ATTR 27283
|
#define UIDMAP_ATTR 27283
|
||||||
#define GIDMAP_ATTR 27284
|
#define GIDMAP_ATTR 27284
|
||||||
#define SETGROUP_ATTR 27285
|
#define SETGROUP_ATTR 27285
|
||||||
|
#define OOM_SCORE_ADJ_ATTR 27286
|
||||||
|
#define ROOTLESS_ATTR 27287
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use the raw syscall for versions of glibc which don't include a function for
|
* Use the raw syscall for versions of glibc which don't include a function for
|
||||||
@ -171,6 +177,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
|
|||||||
policy = "deny";
|
policy = "deny";
|
||||||
break;
|
break;
|
||||||
case SETGROUPS_DEFAULT:
|
case SETGROUPS_DEFAULT:
|
||||||
|
default:
|
||||||
/* Nothing to do. */
|
/* Nothing to do. */
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -185,7 +192,7 @@ static void update_setgroups(int pid, enum policy_t setgroup)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_uidmap(int pid, char *map, int map_len)
|
static void update_uidmap(int pid, char *map, size_t map_len)
|
||||||
{
|
{
|
||||||
if (map == NULL || map_len <= 0)
|
if (map == NULL || map_len <= 0)
|
||||||
return;
|
return;
|
||||||
@ -194,7 +201,7 @@ static void update_uidmap(int pid, char *map, int map_len)
|
|||||||
bail("failed to update /proc/%d/uid_map", pid);
|
bail("failed to update /proc/%d/uid_map", pid);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_gidmap(int pid, char *map, int map_len)
|
static void update_gidmap(int pid, char *map, size_t map_len)
|
||||||
{
|
{
|
||||||
if (map == NULL || map_len <= 0)
|
if (map == NULL || map_len <= 0)
|
||||||
return;
|
return;
|
||||||
@ -203,6 +210,15 @@ static void update_gidmap(int pid, char *map, int map_len)
|
|||||||
bail("failed to update /proc/%d/gid_map", pid);
|
bail("failed to update /proc/%d/gid_map", pid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void update_oom_score_adj(char *data, size_t len)
|
||||||
|
{
|
||||||
|
if (data == NULL || len <= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
|
||||||
|
bail("failed to update /proc/self/oom_score_adj");
|
||||||
|
}
|
||||||
|
|
||||||
/* A dummy function that just jumps to the given jumpval. */
|
/* A dummy function that just jumps to the given jumpval. */
|
||||||
static int child_func(void *arg) __attribute__ ((noinline));
|
static int child_func(void *arg) __attribute__ ((noinline));
|
||||||
static int child_func(void *arg)
|
static int child_func(void *arg)
|
||||||
@ -284,7 +300,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)
|
|||||||
/* Retrieve the netlink header. */
|
/* Retrieve the netlink header. */
|
||||||
len = read(fd, &hdr, NLMSG_HDRLEN);
|
len = read(fd, &hdr, NLMSG_HDRLEN);
|
||||||
if (len != NLMSG_HDRLEN)
|
if (len != NLMSG_HDRLEN)
|
||||||
bail("invalid netlink header length %lu", len);
|
bail("invalid netlink header length %zu", len);
|
||||||
|
|
||||||
if (hdr.nlmsg_type == NLMSG_ERROR)
|
if (hdr.nlmsg_type == NLMSG_ERROR)
|
||||||
bail("failed to read netlink message");
|
bail("failed to read netlink message");
|
||||||
@ -300,7 +316,7 @@ static void nl_parse(int fd, struct nlconfig_t *config)
|
|||||||
|
|
||||||
len = read(fd, data, size);
|
len = read(fd, data, size);
|
||||||
if (len != size)
|
if (len != size)
|
||||||
bail("failed to read netlink payload, %lu != %lu", len, size);
|
bail("failed to read netlink payload, %zu != %zu", len, size);
|
||||||
|
|
||||||
/* Parse the netlink payload. */
|
/* Parse the netlink payload. */
|
||||||
config->data = data;
|
config->data = data;
|
||||||
@ -316,6 +332,13 @@ static void nl_parse(int fd, struct nlconfig_t *config)
|
|||||||
case CLONE_FLAGS_ATTR:
|
case CLONE_FLAGS_ATTR:
|
||||||
config->cloneflags = readint32(current);
|
config->cloneflags = readint32(current);
|
||||||
break;
|
break;
|
||||||
|
case ROOTLESS_ATTR:
|
||||||
|
config->is_rootless = readint8(current);
|
||||||
|
break;
|
||||||
|
case OOM_SCORE_ADJ_ATTR:
|
||||||
|
config->oom_score_adj = current;
|
||||||
|
config->oom_score_adj_len = payload_len;
|
||||||
|
break;
|
||||||
case NS_PATHS_ATTR:
|
case NS_PATHS_ATTR:
|
||||||
config->namespaces = current;
|
config->namespaces = current;
|
||||||
config->namespaces_len = payload_len;
|
config->namespaces_len = payload_len;
|
||||||
@ -413,7 +436,7 @@ void nsexec(void)
|
|||||||
{
|
{
|
||||||
int pipenum;
|
int pipenum;
|
||||||
jmp_buf env;
|
jmp_buf env;
|
||||||
int syncpipe[2];
|
int sync_child_pipe[2], sync_grandchild_pipe[2];
|
||||||
struct nlconfig_t config = {0};
|
struct nlconfig_t config = {0};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -424,18 +447,43 @@ void nsexec(void)
|
|||||||
if (pipenum == -1)
|
if (pipenum == -1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* make the process non-dumpable */
|
|
||||||
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) {
|
|
||||||
bail("failed to set process as non-dumpable");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Parse all of the netlink configuration. */
|
/* Parse all of the netlink configuration. */
|
||||||
nl_parse(pipenum, &config);
|
nl_parse(pipenum, &config);
|
||||||
|
|
||||||
|
/* Set oom_score_adj. This has to be done before !dumpable because
|
||||||
|
* /proc/self/oom_score_adj is not writeable unless you're an privileged
|
||||||
|
* user (if !dumpable is set). All children inherit their parent's
|
||||||
|
* oom_score_adj value on fork(2) so this will always be propagated
|
||||||
|
* properly.
|
||||||
|
*/
|
||||||
|
update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Make the process non-dumpable, to avoid various race conditions that
|
||||||
|
* could cause processes in namespaces we're joining to access host
|
||||||
|
* resources (or potentially execute code).
|
||||||
|
*
|
||||||
|
* However, if the number of namespaces we are joining is 0, we are not
|
||||||
|
* going to be switching to a different security context. Thus setting
|
||||||
|
* ourselves to be non-dumpable only breaks things (like rootless
|
||||||
|
* containers), which is the recommendation from the kernel folks.
|
||||||
|
*/
|
||||||
|
if (config.namespaces) {
|
||||||
|
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
||||||
|
bail("failed to set process as non-dumpable");
|
||||||
|
}
|
||||||
|
|
||||||
/* Pipe so we can tell the child when we've finished setting up. */
|
/* Pipe so we can tell the child when we've finished setting up. */
|
||||||
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0)
|
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
|
||||||
bail("failed to setup sync pipe between parent and child");
|
bail("failed to setup sync pipe between parent and child");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need a new socketpair to sync with grandchild so we don't have
|
||||||
|
* race condition with child.
|
||||||
|
*/
|
||||||
|
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0)
|
||||||
|
bail("failed to setup sync pipe between parent and grandchild");
|
||||||
|
|
||||||
/* TODO: Currently we aren't dealing with child deaths properly. */
|
/* TODO: Currently we aren't dealing with child deaths properly. */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -494,9 +542,10 @@ void nsexec(void)
|
|||||||
* process.
|
* process.
|
||||||
*/
|
*/
|
||||||
case JUMP_PARENT: {
|
case JUMP_PARENT: {
|
||||||
int len, ready = 0;
|
int len;
|
||||||
pid_t child;
|
pid_t child;
|
||||||
char buf[JSON_MAX];
|
char buf[JSON_MAX];
|
||||||
|
bool ready = false;
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
|
prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
|
||||||
@ -513,30 +562,39 @@ void nsexec(void)
|
|||||||
* ready, so we can receive all possible error codes
|
* ready, so we can receive all possible error codes
|
||||||
* generated by children.
|
* generated by children.
|
||||||
*/
|
*/
|
||||||
while (ready < 2) {
|
while (!ready) {
|
||||||
enum sync_t s;
|
enum sync_t s;
|
||||||
|
int ret;
|
||||||
|
|
||||||
/* This doesn't need to be global, we're in the parent. */
|
syncfd = sync_child_pipe[1];
|
||||||
int syncfd = syncpipe[1];
|
close(sync_child_pipe[0]);
|
||||||
|
|
||||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with child: next state");
|
bail("failed to sync with child: next state");
|
||||||
|
|
||||||
switch (s) {
|
switch (s) {
|
||||||
case SYNC_ERR: {
|
case SYNC_ERR:
|
||||||
/* We have to mirror the error code of the child. */
|
/* We have to mirror the error code of the child. */
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
|
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
|
||||||
bail("failed to sync with child: read(error code)");
|
bail("failed to sync with child: read(error code)");
|
||||||
|
|
||||||
exit(ret);
|
exit(ret);
|
||||||
}
|
|
||||||
break;
|
|
||||||
case SYNC_USERMAP_PLS:
|
case SYNC_USERMAP_PLS:
|
||||||
/* Enable setgroups(2) if we've been asked to. */
|
/*
|
||||||
|
* Enable setgroups(2) if we've been asked to. But we also
|
||||||
|
* have to explicitly disable setgroups(2) if we're
|
||||||
|
* creating a rootless container (this is required since
|
||||||
|
* Linux 3.19).
|
||||||
|
*/
|
||||||
|
if (config.is_rootless && config.is_setgroup) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("cannot allow setgroup in an unprivileged user namespace setup");
|
||||||
|
}
|
||||||
|
|
||||||
if (config.is_setgroup)
|
if (config.is_setgroup)
|
||||||
update_setgroups(child, SETGROUPS_ALLOW);
|
update_setgroups(child, SETGROUPS_ALLOW);
|
||||||
|
if (config.is_rootless)
|
||||||
|
update_setgroups(child, SETGROUPS_DENY);
|
||||||
|
|
||||||
/* Set up mappings. */
|
/* Set up mappings. */
|
||||||
update_uidmap(child, config.uidmap, config.uidmap_len);
|
update_uidmap(child, config.uidmap, config.uidmap_len);
|
||||||
@ -548,11 +606,6 @@ void nsexec(void)
|
|||||||
bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
|
bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SYNC_USERMAP_ACK:
|
|
||||||
/* We should _never_ receive acks. */
|
|
||||||
kill(child, SIGKILL);
|
|
||||||
bail("failed to sync with child: unexpected SYNC_USERMAP_ACK");
|
|
||||||
break;
|
|
||||||
case SYNC_RECVPID_PLS: {
|
case SYNC_RECVPID_PLS: {
|
||||||
pid_t old = child;
|
pid_t old = child;
|
||||||
|
|
||||||
@ -570,20 +623,46 @@ void nsexec(void)
|
|||||||
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
|
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ready++;
|
|
||||||
break;
|
|
||||||
case SYNC_RECVPID_ACK:
|
|
||||||
/* We should _never_ receive acks. */
|
|
||||||
kill(child, SIGKILL);
|
|
||||||
bail("failed to sync with child: unexpected SYNC_RECVPID_ACK");
|
|
||||||
break;
|
break;
|
||||||
case SYNC_CHILD_READY:
|
case SYNC_CHILD_READY:
|
||||||
ready++;
|
ready = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
bail("unexpected sync value");
|
bail("unexpected sync value: %u", s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now sync with grandchild. */
|
||||||
|
|
||||||
|
ready = false;
|
||||||
|
while (!ready) {
|
||||||
|
enum sync_t s;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
syncfd = sync_grandchild_pipe[1];
|
||||||
|
close(sync_grandchild_pipe[0]);
|
||||||
|
|
||||||
|
s = SYNC_GRANDCHILD;
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with child: write(SYNC_GRANDCHILD)");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
|
bail("failed to sync with child: next state");
|
||||||
|
|
||||||
|
switch (s) {
|
||||||
|
case SYNC_ERR:
|
||||||
|
/* We have to mirror the error code of the child. */
|
||||||
|
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
|
||||||
|
bail("failed to sync with child: read(error code)");
|
||||||
|
|
||||||
|
exit(ret);
|
||||||
|
case SYNC_CHILD_READY:
|
||||||
|
ready = true;
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
bail("unexpected sync value: %u", s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -615,7 +694,8 @@ void nsexec(void)
|
|||||||
enum sync_t s;
|
enum sync_t s;
|
||||||
|
|
||||||
/* We're in a child and thus need to tell the parent if we die. */
|
/* We're in a child and thus need to tell the parent if we die. */
|
||||||
syncfd = syncpipe[0];
|
syncfd = sync_child_pipe[0];
|
||||||
|
close(sync_child_pipe[1]);
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
|
prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
|
||||||
@ -653,6 +733,11 @@ void nsexec(void)
|
|||||||
* clone_parent rant). So signal our parent to hook us up.
|
* clone_parent rant). So signal our parent to hook us up.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Switching is only necessary if we joined namespaces. */
|
||||||
|
if (config.namespaces) {
|
||||||
|
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
|
||||||
|
bail("failed to set process as dumpable");
|
||||||
|
}
|
||||||
s = SYNC_USERMAP_PLS;
|
s = SYNC_USERMAP_PLS;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
|
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
|
||||||
@ -663,6 +748,11 @@ void nsexec(void)
|
|||||||
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
|
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
|
||||||
if (s != SYNC_USERMAP_ACK)
|
if (s != SYNC_USERMAP_ACK)
|
||||||
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
|
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
|
||||||
|
/* Switching is only necessary if we joined namespaces. */
|
||||||
|
if (config.namespaces) {
|
||||||
|
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
||||||
|
bail("failed to set process as dumpable");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -700,6 +790,12 @@ void nsexec(void)
|
|||||||
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
|
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s = SYNC_CHILD_READY;
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with parent: write(SYNC_CHILD_READY)");
|
||||||
|
}
|
||||||
|
|
||||||
/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
|
/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
@ -718,11 +814,19 @@ void nsexec(void)
|
|||||||
enum sync_t s;
|
enum sync_t s;
|
||||||
|
|
||||||
/* We're in a child and thus need to tell the parent if we die. */
|
/* We're in a child and thus need to tell the parent if we die. */
|
||||||
syncfd = syncpipe[0];
|
syncfd = sync_grandchild_pipe[0];
|
||||||
|
close(sync_grandchild_pipe[1]);
|
||||||
|
close(sync_child_pipe[0]);
|
||||||
|
close(sync_child_pipe[1]);
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);
|
prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);
|
||||||
|
|
||||||
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
|
bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
|
||||||
|
if (s != SYNC_GRANDCHILD)
|
||||||
|
bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);
|
||||||
|
|
||||||
if (setsid() < 0)
|
if (setsid() < 0)
|
||||||
bail("setsid failed");
|
bail("setsid failed");
|
||||||
|
|
||||||
@ -732,16 +836,17 @@ void nsexec(void)
|
|||||||
if (setgid(0) < 0)
|
if (setgid(0) < 0)
|
||||||
bail("setgid failed");
|
bail("setgid failed");
|
||||||
|
|
||||||
|
if (!config.is_rootless && config.is_setgroup) {
|
||||||
if (setgroups(0, NULL) < 0)
|
if (setgroups(0, NULL) < 0)
|
||||||
bail("setgroups failed");
|
bail("setgroups failed");
|
||||||
|
}
|
||||||
|
|
||||||
s = SYNC_CHILD_READY;
|
s = SYNC_CHILD_READY;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
|
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
|
||||||
|
|
||||||
/* Close sync pipes. */
|
/* Close sync pipes. */
|
||||||
close(syncpipe[0]);
|
close(sync_grandchild_pipe[0]);
|
||||||
close(syncpipe[1]);
|
|
||||||
|
|
||||||
/* Free netlink data. */
|
/* Free netlink data. */
|
||||||
nl_free(&config);
|
nl_free(&config);
|
||||||
@ -751,7 +856,6 @@ void nsexec(void)
|
|||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
bail("unexpected jump value");
|
bail("unexpected jump value");
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Should never be reached. */
|
/* Should never be reached. */
|
||||||
|
148
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
generated
vendored
148
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.c
generated
vendored
@ -1,148 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2016 SUSE LLC
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <sys/socket.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include "cmsg.h"
|
|
||||||
|
|
||||||
#define error(fmt, ...) \
|
|
||||||
({ \
|
|
||||||
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
|
|
||||||
errno = ECOMM; \
|
|
||||||
goto err; /* return value */ \
|
|
||||||
})
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Sends a file descriptor along the sockfd provided. Returns the return
|
|
||||||
* value of sendmsg(2). Any synchronisation and preparation of state
|
|
||||||
* should be done external to this (we expect the other side to be in
|
|
||||||
* recvfd() in the code).
|
|
||||||
*/
|
|
||||||
ssize_t sendfd(int sockfd, struct file_t file)
|
|
||||||
{
|
|
||||||
struct msghdr msg = {0};
|
|
||||||
struct iovec iov[1] = {0};
|
|
||||||
struct cmsghdr *cmsg;
|
|
||||||
int *fdptr;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
union {
|
|
||||||
char buf[CMSG_SPACE(sizeof(file.fd))];
|
|
||||||
struct cmsghdr align;
|
|
||||||
} u;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to send some other data along with the ancillary data,
|
|
||||||
* otherwise the other side won't recieve any data. This is very
|
|
||||||
* well-hidden in the documentation (and only applies to
|
|
||||||
* SOCK_STREAM). See the bottom part of unix(7).
|
|
||||||
*/
|
|
||||||
iov[0].iov_base = file.name;
|
|
||||||
iov[0].iov_len = strlen(file.name) + 1;
|
|
||||||
|
|
||||||
msg.msg_name = NULL;
|
|
||||||
msg.msg_namelen = 0;
|
|
||||||
msg.msg_iov = iov;
|
|
||||||
msg.msg_iovlen = 1;
|
|
||||||
msg.msg_control = u.buf;
|
|
||||||
msg.msg_controllen = sizeof(u.buf);
|
|
||||||
|
|
||||||
cmsg = CMSG_FIRSTHDR(&msg);
|
|
||||||
cmsg->cmsg_level = SOL_SOCKET;
|
|
||||||
cmsg->cmsg_type = SCM_RIGHTS;
|
|
||||||
cmsg->cmsg_len = CMSG_LEN(sizeof(int));
|
|
||||||
|
|
||||||
fdptr = (int *) CMSG_DATA(cmsg);
|
|
||||||
memcpy(fdptr, &file.fd, sizeof(int));
|
|
||||||
|
|
||||||
return sendmsg(sockfd, &msg, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Receives a file descriptor from the sockfd provided. Returns the file
|
|
||||||
* descriptor as sent from sendfd(). It will return the file descriptor
|
|
||||||
* or die (literally) trying. Any synchronisation and preparation of
|
|
||||||
* state should be done external to this (we expect the other side to be
|
|
||||||
* in sendfd() in the code).
|
|
||||||
*/
|
|
||||||
struct file_t recvfd(int sockfd)
|
|
||||||
{
|
|
||||||
struct msghdr msg = {0};
|
|
||||||
struct iovec iov[1] = {0};
|
|
||||||
struct cmsghdr *cmsg;
|
|
||||||
struct file_t file = {0};
|
|
||||||
int *fdptr;
|
|
||||||
int olderrno;
|
|
||||||
|
|
||||||
union {
|
|
||||||
char buf[CMSG_SPACE(sizeof(file.fd))];
|
|
||||||
struct cmsghdr align;
|
|
||||||
} u;
|
|
||||||
|
|
||||||
/* Allocate a buffer. */
|
|
||||||
/* TODO: Make this dynamic with MSG_PEEK. */
|
|
||||||
file.name = malloc(TAG_BUFFER);
|
|
||||||
if (!file.name)
|
|
||||||
error("recvfd: failed to allocate file.tag buffer\n");
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to "recieve" the non-ancillary data even though we don't
|
|
||||||
* plan to use it at all. Otherwise, things won't work as expected.
|
|
||||||
* See unix(7) and other well-hidden documentation.
|
|
||||||
*/
|
|
||||||
iov[0].iov_base = file.name;
|
|
||||||
iov[0].iov_len = TAG_BUFFER;
|
|
||||||
|
|
||||||
msg.msg_name = NULL;
|
|
||||||
msg.msg_namelen = 0;
|
|
||||||
msg.msg_iov = iov;
|
|
||||||
msg.msg_iovlen = 1;
|
|
||||||
msg.msg_control = u.buf;
|
|
||||||
msg.msg_controllen = sizeof(u.buf);
|
|
||||||
|
|
||||||
ssize_t ret = recvmsg(sockfd, &msg, 0);
|
|
||||||
if (ret < 0)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
cmsg = CMSG_FIRSTHDR(&msg);
|
|
||||||
if (!cmsg)
|
|
||||||
error("recvfd: got NULL from CMSG_FIRSTHDR");
|
|
||||||
if (cmsg->cmsg_level != SOL_SOCKET)
|
|
||||||
error("recvfd: expected SOL_SOCKET in cmsg: %d", cmsg->cmsg_level);
|
|
||||||
if (cmsg->cmsg_type != SCM_RIGHTS)
|
|
||||||
error("recvfd: expected SCM_RIGHTS in cmsg: %d", cmsg->cmsg_type);
|
|
||||||
if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
|
|
||||||
error("recvfd: expected correct CMSG_LEN in cmsg: %lu", cmsg->cmsg_len);
|
|
||||||
|
|
||||||
fdptr = (int *) CMSG_DATA(cmsg);
|
|
||||||
if (!fdptr || *fdptr < 0)
|
|
||||||
error("recvfd: recieved invalid pointer");
|
|
||||||
|
|
||||||
file.fd = *fdptr;
|
|
||||||
return file;
|
|
||||||
|
|
||||||
err:
|
|
||||||
olderrno = errno;
|
|
||||||
free(file.name);
|
|
||||||
errno = olderrno;
|
|
||||||
return (struct file_t){0};
|
|
||||||
}
|
|
76
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
76
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go
generated
vendored
@ -3,7 +3,7 @@
|
|||||||
package utils
|
package utils
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright 2016 SUSE LLC
|
* Copyright 2016, 2017 SUSE LLC
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
@ -18,28 +18,66 @@ package utils
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
#include <errno.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include "cmsg.h"
|
|
||||||
*/
|
|
||||||
import "C"
|
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"unsafe"
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// MaxSendfdLen is the maximum length of the name of a file descriptor being
|
||||||
|
// sent using SendFd. The name of the file handle returned by RecvFd will never
|
||||||
|
// be larger than this value.
|
||||||
|
const MaxNameLen = 4096
|
||||||
|
|
||||||
|
// oobSpace is the size of the oob slice required to store a single FD. Note
|
||||||
|
// that unix.UnixRights appears to make the assumption that fd is always int32,
|
||||||
|
// so sizeof(fd) = 4.
|
||||||
|
var oobSpace = unix.CmsgSpace(4)
|
||||||
|
|
||||||
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
|
// RecvFd waits for a file descriptor to be sent over the given AF_UNIX
|
||||||
// socket. The file name of the remote file descriptor will be recreated
|
// socket. The file name of the remote file descriptor will be recreated
|
||||||
// locally (it is sent as non-auxiliary data in the same payload).
|
// locally (it is sent as non-auxiliary data in the same payload).
|
||||||
func RecvFd(socket *os.File) (*os.File, error) {
|
func RecvFd(socket *os.File) (*os.File, error) {
|
||||||
file, err := C.recvfd(C.int(socket.Fd()))
|
// For some reason, unix.Recvmsg uses the length rather than the capacity
|
||||||
|
// when passing the msg_controllen and other attributes to recvmsg. So we
|
||||||
|
// have to actually set the length.
|
||||||
|
name := make([]byte, MaxNameLen)
|
||||||
|
oob := make([]byte, oobSpace)
|
||||||
|
|
||||||
|
sockfd := socket.Fd()
|
||||||
|
n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer C.free(unsafe.Pointer(file.name))
|
|
||||||
return os.NewFile(uintptr(file.fd), C.GoString(file.name)), nil
|
if n >= MaxNameLen || oobn != oobSpace {
|
||||||
|
return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Truncate.
|
||||||
|
name = name[:n]
|
||||||
|
oob = oob[:oobn]
|
||||||
|
|
||||||
|
scms, err := unix.ParseSocketControlMessage(oob)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(scms) != 1 {
|
||||||
|
return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
||||||
|
}
|
||||||
|
scm := scms[0]
|
||||||
|
|
||||||
|
fds, err := unix.ParseUnixRights(&scm)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(fds) != 1 {
|
||||||
|
return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds))
|
||||||
|
}
|
||||||
|
fd := uintptr(fds[0])
|
||||||
|
|
||||||
|
return os.NewFile(fd, string(name)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SendFd sends a file descriptor over the given AF_UNIX socket. In
|
// SendFd sends a file descriptor over the given AF_UNIX socket. In
|
||||||
@ -47,11 +85,11 @@ func RecvFd(socket *os.File) (*os.File, error) {
|
|||||||
// non-auxiliary data in the same payload (allowing to send contextual
|
// non-auxiliary data in the same payload (allowing to send contextual
|
||||||
// information for a file descriptor).
|
// information for a file descriptor).
|
||||||
func SendFd(socket, file *os.File) error {
|
func SendFd(socket, file *os.File) error {
|
||||||
var cfile C.struct_file_t
|
name := []byte(file.Name())
|
||||||
cfile.fd = C.int(file.Fd())
|
if len(name) >= MaxNameLen {
|
||||||
cfile.name = C.CString(file.Name())
|
return fmt.Errorf("sendfd: filename too long: %s", file.Name())
|
||||||
defer C.free(unsafe.Pointer(cfile.name))
|
}
|
||||||
|
oob := unix.UnixRights(int(file.Fd()))
|
||||||
_, err := C.sendfd(C.int(socket.Fd()), cfile)
|
|
||||||
return err
|
return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0)
|
||||||
}
|
}
|
||||||
|
36
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
generated
vendored
36
vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.h
generated
vendored
@ -1,36 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2016 SUSE LLC
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#if !defined(CMSG_H)
|
|
||||||
#define CMSG_H
|
|
||||||
|
|
||||||
#include <sys/types.h>
|
|
||||||
|
|
||||||
/* TODO: Implement this properly with MSG_PEEK. */
|
|
||||||
#define TAG_BUFFER 4096
|
|
||||||
|
|
||||||
/* This mirrors Go's (*os.File). */
|
|
||||||
struct file_t {
|
|
||||||
char *name;
|
|
||||||
int fd;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct file_t recvfd(int sockfd);
|
|
||||||
ssize_t sendfd(int sockfd, struct file_t file);
|
|
||||||
|
|
||||||
#endif /* !defined(CMSG_H) */
|
|
10
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
10
vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go
generated
vendored
@ -4,6 +4,7 @@ package utils
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"syscall"
|
"syscall"
|
||||||
)
|
)
|
||||||
@ -31,3 +32,12 @@ func CloseExecFrom(minFd int) error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewSockPair returns a new unix socket pair
|
||||||
|
func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
|
||||||
|
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user