Merge pull request #1237 from hqhq/fix_sync_race
Fix race condition when sync with child and grandchild
This commit is contained in:
commit
8438b26e9f
|
@ -33,7 +33,8 @@ enum sync_t {
|
||||||
SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
|
SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
|
||||||
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
|
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
|
||||||
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
|
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
|
||||||
SYNC_CHILD_READY = 0x44, /* The grandchild is ready to return. */
|
SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */
|
||||||
|
SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */
|
||||||
|
|
||||||
/* XXX: This doesn't help with segfaults and other such issues. */
|
/* XXX: This doesn't help with segfaults and other such issues. */
|
||||||
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
|
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
|
||||||
|
@ -413,7 +414,7 @@ void nsexec(void)
|
||||||
{
|
{
|
||||||
int pipenum;
|
int pipenum;
|
||||||
jmp_buf env;
|
jmp_buf env;
|
||||||
int syncpipe[2];
|
int sync_child_pipe[2], sync_grandchild_pipe[2];
|
||||||
struct nlconfig_t config = {0};
|
struct nlconfig_t config = {0};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -433,9 +434,16 @@ void nsexec(void)
|
||||||
nl_parse(pipenum, &config);
|
nl_parse(pipenum, &config);
|
||||||
|
|
||||||
/* Pipe so we can tell the child when we've finished setting up. */
|
/* Pipe so we can tell the child when we've finished setting up. */
|
||||||
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, syncpipe) < 0)
|
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
|
||||||
bail("failed to setup sync pipe between parent and child");
|
bail("failed to setup sync pipe between parent and child");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need a new socketpair to sync with grandchild so we don't have
|
||||||
|
* race condition with child.
|
||||||
|
*/
|
||||||
|
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0)
|
||||||
|
bail("failed to setup sync pipe between parent and grandchild");
|
||||||
|
|
||||||
/* TODO: Currently we aren't dealing with child deaths properly. */
|
/* TODO: Currently we aren't dealing with child deaths properly. */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -494,9 +502,10 @@ void nsexec(void)
|
||||||
* process.
|
* process.
|
||||||
*/
|
*/
|
||||||
case JUMP_PARENT: {
|
case JUMP_PARENT: {
|
||||||
int len, ready = 0;
|
int len;
|
||||||
pid_t child;
|
pid_t child;
|
||||||
char buf[JSON_MAX];
|
char buf[JSON_MAX];
|
||||||
|
bool ready = false;
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
|
prctl(PR_SET_NAME, (unsigned long) "runc:[0:PARENT]", 0, 0, 0);
|
||||||
|
@ -513,26 +522,23 @@ void nsexec(void)
|
||||||
* ready, so we can receive all possible error codes
|
* ready, so we can receive all possible error codes
|
||||||
* generated by children.
|
* generated by children.
|
||||||
*/
|
*/
|
||||||
while (ready < 2) {
|
while (!ready) {
|
||||||
enum sync_t s;
|
enum sync_t s;
|
||||||
|
int ret;
|
||||||
|
|
||||||
/* This doesn't need to be global, we're in the parent. */
|
syncfd = sync_child_pipe[1];
|
||||||
int syncfd = syncpipe[1];
|
close(sync_child_pipe[0]);
|
||||||
|
|
||||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with child: next state");
|
bail("failed to sync with child: next state");
|
||||||
|
|
||||||
switch (s) {
|
switch (s) {
|
||||||
case SYNC_ERR: {
|
case SYNC_ERR:
|
||||||
/* We have to mirror the error code of the child. */
|
/* We have to mirror the error code of the child. */
|
||||||
int ret;
|
|
||||||
|
|
||||||
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
|
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
|
||||||
bail("failed to sync with child: read(error code)");
|
bail("failed to sync with child: read(error code)");
|
||||||
|
|
||||||
exit(ret);
|
exit(ret);
|
||||||
}
|
|
||||||
break;
|
|
||||||
case SYNC_USERMAP_PLS:
|
case SYNC_USERMAP_PLS:
|
||||||
/* Enable setgroups(2) if we've been asked to. */
|
/* Enable setgroups(2) if we've been asked to. */
|
||||||
if (config.is_setgroup)
|
if (config.is_setgroup)
|
||||||
|
@ -548,11 +554,6 @@ void nsexec(void)
|
||||||
bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
|
bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SYNC_USERMAP_ACK:
|
|
||||||
/* We should _never_ receive acks. */
|
|
||||||
kill(child, SIGKILL);
|
|
||||||
bail("failed to sync with child: unexpected SYNC_USERMAP_ACK");
|
|
||||||
break;
|
|
||||||
case SYNC_RECVPID_PLS: {
|
case SYNC_RECVPID_PLS: {
|
||||||
pid_t old = child;
|
pid_t old = child;
|
||||||
|
|
||||||
|
@ -570,20 +571,46 @@ void nsexec(void)
|
||||||
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
|
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ready++;
|
|
||||||
break;
|
|
||||||
case SYNC_RECVPID_ACK:
|
|
||||||
/* We should _never_ receive acks. */
|
|
||||||
kill(child, SIGKILL);
|
|
||||||
bail("failed to sync with child: unexpected SYNC_RECVPID_ACK");
|
|
||||||
break;
|
break;
|
||||||
case SYNC_CHILD_READY:
|
case SYNC_CHILD_READY:
|
||||||
ready++;
|
ready = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
bail("unexpected sync value");
|
bail("unexpected sync value: %u", s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now sync with grandchild. */
|
||||||
|
|
||||||
|
ready = false;
|
||||||
|
while (!ready) {
|
||||||
|
enum sync_t s;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
syncfd = sync_grandchild_pipe[1];
|
||||||
|
close(sync_grandchild_pipe[0]);
|
||||||
|
|
||||||
|
s = SYNC_GRANDCHILD;
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with child: write(SYNC_GRANDCHILD)");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
|
bail("failed to sync with child: next state");
|
||||||
|
|
||||||
|
switch (s) {
|
||||||
|
case SYNC_ERR:
|
||||||
|
/* We have to mirror the error code of the child. */
|
||||||
|
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
|
||||||
|
bail("failed to sync with child: read(error code)");
|
||||||
|
|
||||||
|
exit(ret);
|
||||||
|
case SYNC_CHILD_READY:
|
||||||
|
ready = true;
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
bail("unexpected sync value: %u", s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -615,7 +642,8 @@ void nsexec(void)
|
||||||
enum sync_t s;
|
enum sync_t s;
|
||||||
|
|
||||||
/* We're in a child and thus need to tell the parent if we die. */
|
/* We're in a child and thus need to tell the parent if we die. */
|
||||||
syncfd = syncpipe[0];
|
syncfd = sync_child_pipe[0];
|
||||||
|
close(sync_child_pipe[1]);
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
|
prctl(PR_SET_NAME, (unsigned long) "runc:[1:CHILD]", 0, 0, 0);
|
||||||
|
@ -700,6 +728,12 @@ void nsexec(void)
|
||||||
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
|
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s = SYNC_CHILD_READY;
|
||||||
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
|
kill(child, SIGKILL);
|
||||||
|
bail("failed to sync with parent: write(SYNC_CHILD_READY)");
|
||||||
|
}
|
||||||
|
|
||||||
/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
|
/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
@ -718,11 +752,19 @@ void nsexec(void)
|
||||||
enum sync_t s;
|
enum sync_t s;
|
||||||
|
|
||||||
/* We're in a child and thus need to tell the parent if we die. */
|
/* We're in a child and thus need to tell the parent if we die. */
|
||||||
syncfd = syncpipe[0];
|
syncfd = sync_grandchild_pipe[0];
|
||||||
|
close(sync_grandchild_pipe[1]);
|
||||||
|
close(sync_child_pipe[0]);
|
||||||
|
close(sync_child_pipe[1]);
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);
|
prctl(PR_SET_NAME, (unsigned long) "runc:[2:INIT]", 0, 0, 0);
|
||||||
|
|
||||||
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
|
bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
|
||||||
|
if (s != SYNC_GRANDCHILD)
|
||||||
|
bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);
|
||||||
|
|
||||||
if (setsid() < 0)
|
if (setsid() < 0)
|
||||||
bail("setsid failed");
|
bail("setsid failed");
|
||||||
|
|
||||||
|
@ -740,8 +782,7 @@ void nsexec(void)
|
||||||
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
|
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
|
||||||
|
|
||||||
/* Close sync pipes. */
|
/* Close sync pipes. */
|
||||||
close(syncpipe[0]);
|
close(sync_grandchild_pipe[0]);
|
||||||
close(syncpipe[1]);
|
|
||||||
|
|
||||||
/* Free netlink data. */
|
/* Free netlink data. */
|
||||||
nl_free(&config);
|
nl_free(&config);
|
||||||
|
@ -751,7 +792,6 @@ void nsexec(void)
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
bail("unexpected jump value");
|
bail("unexpected jump value");
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Should never be reached. */
|
/* Should never be reached. */
|
||||||
|
|
Loading…
Reference in New Issue