2016-03-15 03:55:05 +08:00
|
|
|
#!/usr/bin/env bats
|
|
|
|
|
|
|
|
load helpers
|
|
|
|
|
|
|
|
function setup() {
|
2020-04-20 12:31:09 +08:00
|
|
|
# XXX: currently criu require root containers.
|
|
|
|
requires criu root
|
2019-03-15 05:48:41 +08:00
|
|
|
|
2016-03-15 03:55:05 +08:00
|
|
|
teardown_busybox
|
|
|
|
setup_busybox
|
|
|
|
}
|
|
|
|
|
|
|
|
function teardown() {
|
|
|
|
teardown_busybox
|
|
|
|
}
|
|
|
|
|
tests/checkpoint.bats: fix test hang/failure
Commit a9e15e7e0 adds a check that stdin/out/err pipes
are restored correctly. Commit ec260653b7d4e copy/pastes
the same code to one more another test.
Problem is (as pointed out in commit 5369f9ade3) these tests
sometimes hang. I have also seen them fail.
Apparently, the code used to create pipes and open them to fds
is racy:
```shell
cat $fifo | cat $fifo &
pid=$!
exec 50</proc/$pid/fd/0
exec 51>/proc/$pid/fd/0
```
Since `cat | cat` is spawned asynchronously, by the time exec is used,
the second cat process (i.e. $pid) is already fork'ed but it might
not be exec'ed yet. As a result, we get this (`ls -l /proc/self/fd`):
```
lr-x------. 1 root root 64 Apr 20 02:39 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:39 51 -> /dev/pts/1
```
or, in some cases:
```
lr-x------. 1 root root 64 Apr 20 02:45 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
instead of expected set of pipes:
```
> lr-x------. 1 root root 64 Apr 20 02:45 50 -> 'pipe:[215791]'
> l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
One possible workaround is to add `sleep 0.1` or so after cat|cat,
but it is outright ugly (besides, we already have one sleep in
the test code).
The solution is to not use any external processes to create pipes.
I admit this still looks not very comprehensible, but at least it
is easier than before, and it works.
While at it, remove code duplication, moving the setup and check
code into a pair of functions.
Finally, since the tests are working now, remove the skip.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-20 12:48:11 +08:00
|
|
|
function setup_pipes() {
|
|
|
|
# The changes to 'terminal' are needed for running in detached mode
|
|
|
|
sed -i 's;"terminal": true;"terminal": false;' config.json
|
|
|
|
sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json
|
|
|
|
|
|
|
|
# Create two sets of pipes
|
|
|
|
# for stdout/stderr
|
|
|
|
exec 52<> <(:)
|
|
|
|
exec 50</proc/self/fd/52
|
|
|
|
exec 51>/proc/self/fd/52
|
|
|
|
exec 52>&-
|
|
|
|
# ... and stdin
|
|
|
|
exec 62<> <(:)
|
|
|
|
exec 60</proc/self/fd/62
|
|
|
|
exec 61>/proc/self/fd/62
|
|
|
|
exec 62>&-
|
|
|
|
}
|
|
|
|
|
|
|
|
function check_pipes() {
|
|
|
|
echo Ping >&61
|
|
|
|
exec 61>&-
|
|
|
|
exec 51>&-
|
|
|
|
run cat <&50
|
|
|
|
[ "$status" -eq 0 ]
|
|
|
|
[[ "${output}" == *"ponG Ping"* ]]
|
|
|
|
}
|
|
|
|
|
2020-04-21 08:56:08 +08:00
|
|
|
function simple_cr() {
|
2017-04-20 10:54:07 +08:00
|
|
|
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
|
|
|
[ "$status" -eq 0 ]
|
2016-05-16 16:21:52 +08:00
|
|
|
|
2017-06-10 07:49:40 +08:00
|
|
|
testcontainer test_busybox running
|
2016-05-16 16:21:52 +08:00
|
|
|
|
2017-04-20 10:54:07 +08:00
|
|
|
for i in `seq 2`; do
|
2017-06-24 23:07:41 +08:00
|
|
|
# checkpoint the running container
|
|
|
|
runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox
|
|
|
|
cat ./work-dir/dump.log | grep -B 5 Error || true
|
2020-04-20 08:14:51 +08:00
|
|
|
[ "$status" -eq 0 ]
|
2017-06-24 23:07:41 +08:00
|
|
|
|
|
|
|
# after checkpoint busybox is no longer running
|
2020-04-20 12:28:15 +08:00
|
|
|
testcontainer test_busybox checkpointed
|
2017-06-24 23:07:41 +08:00
|
|
|
|
|
|
|
# restore from checkpoint
|
|
|
|
runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox
|
|
|
|
ret=$?
|
|
|
|
cat ./work-dir/restore.log | grep -B 5 Error || true
|
|
|
|
[ "$ret" -eq 0 ]
|
|
|
|
|
|
|
|
# busybox should be back up and running
|
|
|
|
testcontainer test_busybox running
|
2017-04-20 10:54:07 +08:00
|
|
|
done
|
2016-03-15 03:55:05 +08:00
|
|
|
}
|
2016-08-24 17:48:56 +08:00
|
|
|
|
2020-04-21 08:56:08 +08:00
|
|
|
@test "checkpoint and restore " {
|
|
|
|
simple_cr
|
|
|
|
}
|
|
|
|
|
|
|
|
@test "checkpoint and restore (cgroupns)" {
|
|
|
|
# cgroupv2 already enables cgroupns so this case was tested above already
|
|
|
|
requires cgroups_v1
|
|
|
|
|
|
|
|
# enable CGROUPNS
|
|
|
|
sed -i 's|\("namespaces": \[\)|\1\n\t\t\t{"type": "cgroup"},|' config.json
|
|
|
|
|
|
|
|
simple_cr
|
|
|
|
}
|
|
|
|
|
2016-05-11 15:45:00 +08:00
|
|
|
@test "checkpoint --pre-dump and restore" {
|
tests/checkpoint.bats: fix test hang/failure
Commit a9e15e7e0 adds a check that stdin/out/err pipes
are restored correctly. Commit ec260653b7d4e copy/pastes
the same code to one more another test.
Problem is (as pointed out in commit 5369f9ade3) these tests
sometimes hang. I have also seen them fail.
Apparently, the code used to create pipes and open them to fds
is racy:
```shell
cat $fifo | cat $fifo &
pid=$!
exec 50</proc/$pid/fd/0
exec 51>/proc/$pid/fd/0
```
Since `cat | cat` is spawned asynchronously, by the time exec is used,
the second cat process (i.e. $pid) is already fork'ed but it might
not be exec'ed yet. As a result, we get this (`ls -l /proc/self/fd`):
```
lr-x------. 1 root root 64 Apr 20 02:39 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:39 51 -> /dev/pts/1
```
or, in some cases:
```
lr-x------. 1 root root 64 Apr 20 02:45 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
instead of expected set of pipes:
```
> lr-x------. 1 root root 64 Apr 20 02:45 50 -> 'pipe:[215791]'
> l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
One possible workaround is to add `sleep 0.1` or so after cat|cat,
but it is outright ugly (besides, we already have one sleep in
the test code).
The solution is to not use any external processes to create pipes.
I admit this still looks not very comprehensible, but at least it
is easier than before, and it works.
While at it, remove code duplication, moving the setup and check
code into a pair of functions.
Finally, since the tests are working now, remove the skip.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-20 12:48:11 +08:00
|
|
|
setup_pipes
|
2016-08-24 17:48:56 +08:00
|
|
|
|
2018-11-19 22:24:50 +08:00
|
|
|
# run busybox
|
2017-05-03 05:17:31 +08:00
|
|
|
__runc run -d test_busybox <&60 >&51 2>&51
|
|
|
|
[ $? -eq 0 ]
|
|
|
|
|
|
|
|
testcontainer test_busybox running
|
2016-08-24 17:48:56 +08:00
|
|
|
|
|
|
|
#test checkpoint pre-dump
|
|
|
|
mkdir parent-dir
|
|
|
|
runc --criu "$CRIU" checkpoint --pre-dump --image-path ./parent-dir test_busybox
|
|
|
|
[ "$status" -eq 0 ]
|
|
|
|
|
|
|
|
# busybox should still be running
|
2020-04-20 12:28:15 +08:00
|
|
|
testcontainer test_busybox running
|
2016-08-24 17:48:56 +08:00
|
|
|
|
|
|
|
# checkpoint the running container
|
|
|
|
mkdir image-dir
|
2017-05-03 05:17:31 +08:00
|
|
|
mkdir work-dir
|
|
|
|
runc --criu "$CRIU" checkpoint --parent-path ./parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox
|
|
|
|
cat ./work-dir/dump.log | grep -B 5 Error || true
|
2016-08-24 17:48:56 +08:00
|
|
|
[ "$status" -eq 0 ]
|
|
|
|
|
|
|
|
# after checkpoint busybox is no longer running
|
2020-04-20 12:28:15 +08:00
|
|
|
testcontainer test_busybox checkpointed
|
2016-08-24 17:48:56 +08:00
|
|
|
|
|
|
|
# restore from checkpoint
|
2017-05-03 05:17:31 +08:00
|
|
|
__runc --criu "$CRIU" restore -d --work-path ./work-dir --image-path ./image-dir test_busybox <&60 >&51 2>&51
|
|
|
|
ret=$?
|
|
|
|
cat ./work-dir/restore.log | grep -B 5 Error || true
|
|
|
|
[ $ret -eq 0 ]
|
2016-08-24 17:48:56 +08:00
|
|
|
|
|
|
|
# busybox should be back up and running
|
2017-05-03 05:17:31 +08:00
|
|
|
testcontainer test_busybox running
|
|
|
|
|
|
|
|
runc exec --cwd /bin test_busybox echo ok
|
2016-08-24 17:48:56 +08:00
|
|
|
[ "$status" -eq 0 ]
|
2017-05-03 05:17:31 +08:00
|
|
|
[[ ${output} == "ok" ]]
|
|
|
|
|
tests/checkpoint.bats: fix test hang/failure
Commit a9e15e7e0 adds a check that stdin/out/err pipes
are restored correctly. Commit ec260653b7d4e copy/pastes
the same code to one more another test.
Problem is (as pointed out in commit 5369f9ade3) these tests
sometimes hang. I have also seen them fail.
Apparently, the code used to create pipes and open them to fds
is racy:
```shell
cat $fifo | cat $fifo &
pid=$!
exec 50</proc/$pid/fd/0
exec 51>/proc/$pid/fd/0
```
Since `cat | cat` is spawned asynchronously, by the time exec is used,
the second cat process (i.e. $pid) is already fork'ed but it might
not be exec'ed yet. As a result, we get this (`ls -l /proc/self/fd`):
```
lr-x------. 1 root root 64 Apr 20 02:39 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:39 51 -> /dev/pts/1
```
or, in some cases:
```
lr-x------. 1 root root 64 Apr 20 02:45 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
instead of expected set of pipes:
```
> lr-x------. 1 root root 64 Apr 20 02:45 50 -> 'pipe:[215791]'
> l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
One possible workaround is to add `sleep 0.1` or so after cat|cat,
but it is outright ugly (besides, we already have one sleep in
the test code).
The solution is to not use any external processes to create pipes.
I admit this still looks not very comprehensible, but at least it
is easier than before, and it works.
While at it, remove code duplication, moving the setup and check
code into a pair of functions.
Finally, since the tests are working now, remove the skip.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-20 12:48:11 +08:00
|
|
|
check_pipes
|
2016-08-24 17:48:56 +08:00
|
|
|
}
|
2017-08-11 03:24:31 +08:00
|
|
|
|
|
|
|
@test "checkpoint --lazy-pages and restore" {
|
|
|
|
# check if lazy-pages is supported
|
2018-07-03 22:46:31 +08:00
|
|
|
run ${CRIU} check --feature uffd-noncoop
|
2017-08-11 03:24:31 +08:00
|
|
|
if [ "$status" -eq 1 ]; then
|
|
|
|
skip "this criu does not support lazy migration"
|
|
|
|
fi
|
|
|
|
|
tests/checkpoint.bats: fix test hang/failure
Commit a9e15e7e0 adds a check that stdin/out/err pipes
are restored correctly. Commit ec260653b7d4e copy/pastes
the same code to one more another test.
Problem is (as pointed out in commit 5369f9ade3) these tests
sometimes hang. I have also seen them fail.
Apparently, the code used to create pipes and open them to fds
is racy:
```shell
cat $fifo | cat $fifo &
pid=$!
exec 50</proc/$pid/fd/0
exec 51>/proc/$pid/fd/0
```
Since `cat | cat` is spawned asynchronously, by the time exec is used,
the second cat process (i.e. $pid) is already fork'ed but it might
not be exec'ed yet. As a result, we get this (`ls -l /proc/self/fd`):
```
lr-x------. 1 root root 64 Apr 20 02:39 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:39 51 -> /dev/pts/1
```
or, in some cases:
```
lr-x------. 1 root root 64 Apr 20 02:45 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
instead of expected set of pipes:
```
> lr-x------. 1 root root 64 Apr 20 02:45 50 -> 'pipe:[215791]'
> l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
One possible workaround is to add `sleep 0.1` or so after cat|cat,
but it is outright ugly (besides, we already have one sleep in
the test code).
The solution is to not use any external processes to create pipes.
I admit this still looks not very comprehensible, but at least it
is easier than before, and it works.
While at it, remove code duplication, moving the setup and check
code into a pair of functions.
Finally, since the tests are working now, remove the skip.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-20 12:48:11 +08:00
|
|
|
setup_pipes
|
|
|
|
|
2018-11-19 22:24:50 +08:00
|
|
|
# This should not be necessary: https://github.com/checkpoint-restore/criu/issues/575
|
2017-08-11 03:24:31 +08:00
|
|
|
sed -i 's;"readonly": true;"readonly": false;' config.json
|
|
|
|
|
|
|
|
# TCP port for lazy migration
|
|
|
|
port=27277
|
|
|
|
|
|
|
|
# run busybox
|
|
|
|
__runc run -d test_busybox <&60 >&51 2>&51
|
|
|
|
[ $? -eq 0 ]
|
|
|
|
|
|
|
|
testcontainer test_busybox running
|
|
|
|
|
|
|
|
# checkpoint the running container
|
|
|
|
mkdir image-dir
|
|
|
|
mkdir work-dir
|
|
|
|
|
runc checkpoint: fix --status-fd to accept fd
1. The command `runc checkpoint --lazy-server --status-fd $FD` actually
accepts a file name as an $FD. Make it accept a file descriptor,
like its name implies and the documentation states.
In addition, since runc itself does not use the result of CRIU status
fd, remove the code which relays it, and pass the FD directly to CRIU.
Note 1: runc should close this file descriptor itself after passing it
to criu, otherwise whoever waits on it might wait forever.
Note 2: due to the way criu swrk consumes the fd (it reopens
/proc/$SENDER_PID/fd/$FD), runc can't close it as soon as criu swrk has
started. There is no good way to know when criu swrk has reopened the
fd, so we assume that as soon as we have received something back, the
fd is already reopened.
2. Since the meaning of --status-fd has changed, the test case using
it needs to be fixed as well.
Modify the lazy migration test to remove "sleep 2", actually waiting
for the the lazy page server to be ready.
While at it,
- remove the double fork (using shell's background process is
sufficient here);
- check the exit code for "runc checkpoint" and "criu lazy-pages";
- remove the check for no errors in dump.log after restore, as we
are already checking its exit code.
[v2: properly close status fd after spawning criu]
[v3: move close status fd to after the first read]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-21 17:43:24 +08:00
|
|
|
# For lazy migration we need to know when CRIU is ready to serve
|
|
|
|
# the memory pages via TCP.
|
|
|
|
exec 72<> <(:)
|
|
|
|
exec 70</proc/self/fd/72 71>/proc/self/fd/72
|
|
|
|
exec 72>&-
|
|
|
|
|
|
|
|
__runc --criu "$CRIU" checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd 71 --work-path ./work-dir --image-path ./image-dir test_busybox &
|
|
|
|
cpt_pid=$!
|
|
|
|
|
|
|
|
# wait for lazy page server to be ready
|
|
|
|
out=$(timeout 2 dd if=/proc/self/fd/70 bs=1 count=1 2>/dev/null | od)
|
|
|
|
exec 71>&-
|
|
|
|
out=$(echo $out) # rm newlines
|
|
|
|
# show errors if there are any before we fail
|
|
|
|
grep -B5 Error ./work-dir/dump.log || true
|
|
|
|
# expecting \0 which od prints as
|
|
|
|
[ "$out" = "0000000 000000 0000001" ]
|
2017-08-11 03:24:31 +08:00
|
|
|
|
runc checkpoint: fix --status-fd to accept fd
1. The command `runc checkpoint --lazy-server --status-fd $FD` actually
accepts a file name as an $FD. Make it accept a file descriptor,
like its name implies and the documentation states.
In addition, since runc itself does not use the result of CRIU status
fd, remove the code which relays it, and pass the FD directly to CRIU.
Note 1: runc should close this file descriptor itself after passing it
to criu, otherwise whoever waits on it might wait forever.
Note 2: due to the way criu swrk consumes the fd (it reopens
/proc/$SENDER_PID/fd/$FD), runc can't close it as soon as criu swrk has
started. There is no good way to know when criu swrk has reopened the
fd, so we assume that as soon as we have received something back, the
fd is already reopened.
2. Since the meaning of --status-fd has changed, the test case using
it needs to be fixed as well.
Modify the lazy migration test to remove "sleep 2", actually waiting
for the the lazy page server to be ready.
While at it,
- remove the double fork (using shell's background process is
sufficient here);
- check the exit code for "runc checkpoint" and "criu lazy-pages";
- remove the check for no errors in dump.log after restore, as we
are already checking its exit code.
[v2: properly close status fd after spawning criu]
[v3: move close status fd to after the first read]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-21 17:43:24 +08:00
|
|
|
# Check if inventory.img was written
|
|
|
|
[ -e image-dir/inventory.img ]
|
2017-08-11 03:24:31 +08:00
|
|
|
|
|
|
|
# Start CRIU in lazy-daemon mode
|
runc checkpoint: fix --status-fd to accept fd
1. The command `runc checkpoint --lazy-server --status-fd $FD` actually
accepts a file name as an $FD. Make it accept a file descriptor,
like its name implies and the documentation states.
In addition, since runc itself does not use the result of CRIU status
fd, remove the code which relays it, and pass the FD directly to CRIU.
Note 1: runc should close this file descriptor itself after passing it
to criu, otherwise whoever waits on it might wait forever.
Note 2: due to the way criu swrk consumes the fd (it reopens
/proc/$SENDER_PID/fd/$FD), runc can't close it as soon as criu swrk has
started. There is no good way to know when criu swrk has reopened the
fd, so we assume that as soon as we have received something back, the
fd is already reopened.
2. Since the meaning of --status-fd has changed, the test case using
it needs to be fixed as well.
Modify the lazy migration test to remove "sleep 2", actually waiting
for the the lazy page server to be ready.
While at it,
- remove the double fork (using shell's background process is
sufficient here);
- check the exit code for "runc checkpoint" and "criu lazy-pages";
- remove the check for no errors in dump.log after restore, as we
are already checking its exit code.
[v2: properly close status fd after spawning criu]
[v3: move close status fd to after the first read]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-21 17:43:24 +08:00
|
|
|
${CRIU} lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir &
|
|
|
|
lp_pid=$!
|
2017-08-11 03:24:31 +08:00
|
|
|
|
|
|
|
# Restore lazily from checkpoint.
|
|
|
|
# The restored container needs a different name as the checkpointed
|
|
|
|
# container is not yet destroyed. It is only destroyed at that point
|
|
|
|
# in time when the last page is lazily transferred to the destination.
|
|
|
|
# Killing the CRIU on the checkpoint side will let the container
|
|
|
|
# continue to run if the migration failed at some point.
|
|
|
|
__runc --criu "$CRIU" restore -d --work-path ./image-dir --image-path ./image-dir --lazy-pages test_busybox_restore <&60 >&51 2>&51
|
2020-04-20 08:14:51 +08:00
|
|
|
[ $? -eq 0 ]
|
2017-08-11 03:24:31 +08:00
|
|
|
|
|
|
|
# busybox should be back up and running
|
|
|
|
testcontainer test_busybox_restore running
|
|
|
|
|
|
|
|
runc exec --cwd /bin test_busybox_restore echo ok
|
|
|
|
[ "$status" -eq 0 ]
|
|
|
|
[[ ${output} == "ok" ]]
|
|
|
|
|
runc checkpoint: fix --status-fd to accept fd
1. The command `runc checkpoint --lazy-server --status-fd $FD` actually
accepts a file name as an $FD. Make it accept a file descriptor,
like its name implies and the documentation states.
In addition, since runc itself does not use the result of CRIU status
fd, remove the code which relays it, and pass the FD directly to CRIU.
Note 1: runc should close this file descriptor itself after passing it
to criu, otherwise whoever waits on it might wait forever.
Note 2: due to the way criu swrk consumes the fd (it reopens
/proc/$SENDER_PID/fd/$FD), runc can't close it as soon as criu swrk has
started. There is no good way to know when criu swrk has reopened the
fd, so we assume that as soon as we have received something back, the
fd is already reopened.
2. Since the meaning of --status-fd has changed, the test case using
it needs to be fixed as well.
Modify the lazy migration test to remove "sleep 2", actually waiting
for the the lazy page server to be ready.
While at it,
- remove the double fork (using shell's background process is
sufficient here);
- check the exit code for "runc checkpoint" and "criu lazy-pages";
- remove the check for no errors in dump.log after restore, as we
are already checking its exit code.
[v2: properly close status fd after spawning criu]
[v3: move close status fd to after the first read]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-21 17:43:24 +08:00
|
|
|
wait $cpt_pid
|
|
|
|
[ $? -eq 0 ]
|
|
|
|
|
|
|
|
wait $lp_pid
|
|
|
|
[ $? -eq 0 ]
|
|
|
|
|
tests/checkpoint.bats: fix test hang/failure
Commit a9e15e7e0 adds a check that stdin/out/err pipes
are restored correctly. Commit ec260653b7d4e copy/pastes
the same code to one more another test.
Problem is (as pointed out in commit 5369f9ade3) these tests
sometimes hang. I have also seen them fail.
Apparently, the code used to create pipes and open them to fds
is racy:
```shell
cat $fifo | cat $fifo &
pid=$!
exec 50</proc/$pid/fd/0
exec 51>/proc/$pid/fd/0
```
Since `cat | cat` is spawned asynchronously, by the time exec is used,
the second cat process (i.e. $pid) is already fork'ed but it might
not be exec'ed yet. As a result, we get this (`ls -l /proc/self/fd`):
```
lr-x------. 1 root root 64 Apr 20 02:39 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:39 51 -> /dev/pts/1
```
or, in some cases:
```
lr-x------. 1 root root 64 Apr 20 02:45 50 -> /dev/pts/1
l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
instead of expected set of pipes:
```
> lr-x------. 1 root root 64 Apr 20 02:45 50 -> 'pipe:[215791]'
> l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]'
```
One possible workaround is to add `sleep 0.1` or so after cat|cat,
but it is outright ugly (besides, we already have one sleep in
the test code).
The solution is to not use any external processes to create pipes.
I admit this still looks not very comprehensible, but at least it
is easier than before, and it works.
While at it, remove code duplication, moving the setup and check
code into a pair of functions.
Finally, since the tests are working now, remove the skip.
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-20 12:48:11 +08:00
|
|
|
check_pipes
|
2017-08-11 03:24:31 +08:00
|
|
|
}
|
2018-07-07 02:40:18 +08:00
|
|
|
|
|
|
|
@test "checkpoint and restore in external network namespace" {
|
|
|
|
# check if external_net_ns is supported; only with criu 3.10++
|
|
|
|
run ${CRIU} check --feature external_net_ns
|
|
|
|
if [ "$status" -eq 1 ]; then
|
|
|
|
# this criu does not support external_net_ns; skip the test
|
|
|
|
skip "this criu does not support external network namespaces"
|
|
|
|
fi
|
|
|
|
|
|
|
|
# create a temporary name for the test network namespace
|
|
|
|
tmp=`mktemp`
|
|
|
|
rm -f $tmp
|
|
|
|
ns_name=`basename $tmp`
|
|
|
|
# create network namespace
|
|
|
|
ip netns add $ns_name
|
|
|
|
ns_path=`ip netns add $ns_name 2>&1 | sed -e 's/.*"\(.*\)".*/\1/'`
|
|
|
|
|
|
|
|
ns_inode=`ls -iL $ns_path | awk '{ print $1 }'`
|
|
|
|
|
|
|
|
# tell runc which network namespace to use
|
|
|
|
sed -i "s;\"type\": \"network\";\"type\": \"network\",\"path\": \"$ns_path\";" config.json
|
|
|
|
|
|
|
|
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
|
|
|
[ "$status" -eq 0 ]
|
|
|
|
|
|
|
|
testcontainer test_busybox running
|
|
|
|
|
|
|
|
for i in `seq 2`; do
|
|
|
|
# checkpoint the running container; this automatically tells CRIU to
|
|
|
|
# handle the network namespace defined in config.json as an external
|
|
|
|
runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox
|
|
|
|
# if you are having problems getting criu to work uncomment the following dump:
|
|
|
|
#cat /run/opencontainer/containers/test_busybox/criu.work/dump.log
|
|
|
|
cat ./work-dir/dump.log | grep -B 5 Error || true
|
2020-04-20 08:14:51 +08:00
|
|
|
[ "$status" -eq 0 ]
|
2018-07-07 02:40:18 +08:00
|
|
|
|
|
|
|
# after checkpoint busybox is no longer running
|
2020-04-20 12:28:15 +08:00
|
|
|
testcontainer test_busybox checkpointed
|
2018-07-07 02:40:18 +08:00
|
|
|
|
|
|
|
# restore from checkpoint; this should restore the container into the existing network namespace
|
|
|
|
runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox
|
|
|
|
ret=$?
|
|
|
|
cat ./work-dir/restore.log | grep -B 5 Error || true
|
|
|
|
[ "$ret" -eq 0 ]
|
|
|
|
|
|
|
|
# busybox should be back up and running
|
|
|
|
testcontainer test_busybox running
|
|
|
|
|
|
|
|
# container should be running in same network namespace as before
|
|
|
|
pid=`__runc state test_busybox | jq '.pid'`
|
|
|
|
ns_inode_new=`readlink /proc/$pid/ns/net | sed -e 's/.*\[\(.*\)\]/\1/'`
|
|
|
|
echo "old network namespace inode $ns_inode"
|
|
|
|
echo "new network namespace inode $ns_inode_new"
|
|
|
|
[ "$ns_inode" -eq "$ns_inode_new" ]
|
|
|
|
done
|
|
|
|
ip netns del $ns_name
|
|
|
|
}
|
|
|
|
|
2018-12-12 23:54:48 +08:00
|
|
|
@test "checkpoint and restore with container specific CRIU config" {
|
|
|
|
tmp=`mktemp /tmp/runc-criu-XXXXXX.conf`
|
|
|
|
# This is the file we write to /etc/criu/default.conf
|
|
|
|
tmplog1=`mktemp /tmp/runc-criu-log-XXXXXX.log`
|
|
|
|
unlink $tmplog1
|
|
|
|
tmplog1=`basename $tmplog1`
|
|
|
|
# That is the actual configuration file to be used
|
|
|
|
tmplog2=`mktemp /tmp/runc-criu-log-XXXXXX.log`
|
|
|
|
unlink $tmplog2
|
|
|
|
tmplog2=`basename $tmplog2`
|
|
|
|
# This adds the annotation 'org.criu.config' to set a container
|
|
|
|
# specific CRIU config file.
|
|
|
|
sed -i "s;\"process\";\"annotations\":{\"org.criu.config\": \"$tmp\"},\"process\";" config.json
|
|
|
|
# Tell CRIU to use another configuration file
|
|
|
|
mkdir -p /etc/criu
|
|
|
|
echo "log-file=$tmplog1" > /etc/criu/default.conf
|
|
|
|
# Make sure the RPC defined configuration file overwrites the previous
|
|
|
|
echo "log-file=$tmplog2" > $tmp
|
|
|
|
|
|
|
|
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
|
|
|
[ "$status" -eq 0 ]
|
|
|
|
|
|
|
|
testcontainer test_busybox running
|
|
|
|
|
|
|
|
# checkpoint the running container
|
|
|
|
runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox
|
|
|
|
[ "$status" -eq 0 ]
|
|
|
|
! test -f ./work-dir/$tmplog1
|
|
|
|
test -f ./work-dir/$tmplog2
|
|
|
|
|
|
|
|
# after checkpoint busybox is no longer running
|
2020-04-20 12:28:15 +08:00
|
|
|
testcontainer test_busybox checkpointed
|
2018-12-12 23:54:48 +08:00
|
|
|
|
|
|
|
test -f ./work-dir/$tmplog2 && unlink ./work-dir/$tmplog2
|
|
|
|
# restore from checkpoint
|
|
|
|
runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox
|
|
|
|
[ "$status" -eq 0 ]
|
|
|
|
! test -f ./work-dir/$tmplog1
|
|
|
|
test -f ./work-dir/$tmplog2
|
|
|
|
|
|
|
|
# busybox should be back up and running
|
|
|
|
testcontainer test_busybox running
|
|
|
|
unlink $tmp
|
|
|
|
test -f ./work-dir/$tmplog2 && unlink ./work-dir/$tmplog2
|
|
|
|
}
|
|
|
|
|