From 69d599ddbdfe47b9b9ea6994fe2aba24e09ec9b7 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 19 Apr 2020 17:14:51 -0700 Subject: [PATCH 1/4] tests/checkpoint.bats: fix $? checks runc in this file is actually a function that does `run runc ...`, and `run` sets variable `$status` as the exit code, so `$status` is what should be checked. If calling runc directly (as in `__runc ...`), then $? is legit. While at it, remove an obsoleted comment, and an unneeded `ret=$?` assignment (check `$?` directly). Signed-off-by: Kir Kolyshkin --- tests/integration/checkpoint.bats | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index 6c11d083..4c5c6a03 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -29,11 +29,8 @@ function teardown() { for i in `seq 2`; do # checkpoint the running container runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox - ret=$? - # if you are having problems getting criu to work uncomment the following dump: - #cat /run/opencontainer/containers/test_busybox/criu.work/dump.log cat ./work-dir/dump.log | grep -B 5 Error || true - [ "$ret" -eq 0 ] + [ "$status" -eq 0 ] # after checkpoint busybox is no longer running runc state test_busybox @@ -216,8 +213,7 @@ function teardown() { # Killing the CRIU on the checkpoint side will let the container # continue to run if the migration failed at some point. __runc --criu "$CRIU" restore -d --work-path ./image-dir --image-path ./image-dir --lazy-pages test_busybox_restore <&60 >&51 2>&51 - ret=$? - [ $ret -eq 0 ] + [ $? -eq 0 ] run grep -B 5 Error ./work-dir/dump.log -q [ "$status" -eq 1 ] @@ -269,11 +265,10 @@ function teardown() { # checkpoint the running container; this automatically tells CRIU to # handle the network namespace defined in config.json as an external runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox - ret=$? # if you are having problems getting criu to work uncomment the following dump: #cat /run/opencontainer/containers/test_busybox/criu.work/dump.log cat ./work-dir/dump.log | grep -B 5 Error || true - [ "$ret" -eq 0 ] + [ "$status" -eq 0 ] # after checkpoint busybox is no longer running runc state test_busybox From e216457eea3e9074bd064685ffa78eeea9700883 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 19 Apr 2020 21:28:15 -0700 Subject: [PATCH 2/4] tests/checkpoint.bats: simplify status checks Introduce a special case for `testcontainer` to test for container that is not present (checkpointed), use it. Fix one place where testcontainer was not used. Signed-off-by: Kir Kolyshkin --- tests/integration/checkpoint.bats | 16 +++++----------- tests/integration/helpers.bash | 4 ++++ 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index 4c5c6a03..0e948406 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -33,8 +33,7 @@ function teardown() { [ "$status" -eq 0 ] # after checkpoint busybox is no longer running - runc state test_busybox - [ "$status" -ne 0 ] + testcontainer test_busybox checkpointed # restore from checkpoint runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox @@ -87,9 +86,7 @@ function teardown() { [ "$status" -eq 0 ] # busybox should still be running - runc state test_busybox - [ "$status" -eq 0 ] - [[ "${output}" == *"running"* ]] + testcontainer test_busybox running # checkpoint the running container mkdir image-dir @@ -99,8 +96,7 @@ function teardown() { [ "$status" -eq 0 ] # after checkpoint busybox is no longer running - runc state test_busybox - [ "$status" -ne 0 ] + testcontainer test_busybox checkpointed # restore from checkpoint __runc --criu "$CRIU" restore -d --work-path ./work-dir --image-path ./image-dir test_busybox <&60 >&51 2>&51 @@ -271,8 +267,7 @@ function teardown() { [ "$status" -eq 0 ] # after checkpoint busybox is no longer running - runc state test_busybox - [ "$status" -ne 0 ] + testcontainer test_busybox checkpointed # restore from checkpoint; this should restore the container into the existing network namespace runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox @@ -327,8 +322,7 @@ function teardown() { test -f ./work-dir/$tmplog2 # after checkpoint busybox is no longer running - runc state test_busybox - [ "$status" -ne 0 ] + testcontainer test_busybox checkpointed test -f ./work-dir/$tmplog2 && unlink ./work-dir/$tmplog2 # restore from checkpoint diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index de8be4ba..df079578 100644 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -310,6 +310,10 @@ function wait_for_container_inroot() { function testcontainer() { # test state of container runc state $1 + if [ $2 == "checkpointed" ]; then + [ "$status" -eq 1 ] + return + fi [ "$status" -eq 0 ] [[ "${output}" == *"$2"* ]] } From bf172ef44f2a266aee726c2f34918b976e20dd8a Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 19 Apr 2020 21:31:09 -0700 Subject: [PATCH 3/4] tests/checkpoint.bats: consolidate requires checks Since all the criu tests have the same requirements, move them to setup(). While at it, remove an obviously redundant comment. Signed-off-by: Kir Kolyshkin --- tests/integration/checkpoint.bats | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index 0e948406..7a7252f5 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -8,6 +8,8 @@ function setup() { fi # All checkpoint tests are currently failing on v2 requires cgroups_v1 + # XXX: currently criu require root containers. + requires criu root teardown_busybox setup_busybox @@ -18,9 +20,6 @@ function teardown() { } @test "checkpoint and restore" { - # XXX: currently criu require root containers. - requires criu root - runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] @@ -47,9 +46,6 @@ function teardown() { } @test "checkpoint --pre-dump and restore" { - # XXX: currently criu require root containers. - requires criu root - # The changes to 'terminal' are needed for running in detached mode sed -i 's;"terminal": true;"terminal": false;' config.json sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json @@ -120,13 +116,9 @@ function teardown() { } @test "checkpoint --lazy-pages and restore" { - # XXX: currently criu require root containers. - requires criu root - # check if lazy-pages is supported run ${CRIU} check --feature uffd-noncoop if [ "$status" -eq 1 ]; then - # this criu does not support lazy migration; skip the test skip "this criu does not support lazy migration" fi @@ -229,9 +221,6 @@ function teardown() { } @test "checkpoint and restore in external network namespace" { - # XXX: currently criu require root containers. - requires criu root - # check if external_net_ns is supported; only with criu 3.10++ run ${CRIU} check --feature external_net_ns if [ "$status" -eq 1 ]; then @@ -289,9 +278,6 @@ function teardown() { } @test "checkpoint and restore with container specific CRIU config" { - # XXX: currently criu require root containers. - requires criu root - tmp=`mktemp /tmp/runc-criu-XXXXXX.conf` # This is the file we write to /etc/criu/default.conf tmplog1=`mktemp /tmp/runc-criu-log-XXXXXX.log` From d5e68ceb0ccadcc9b49338ae04736537ce2e0dc6 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 19 Apr 2020 21:48:11 -0700 Subject: [PATCH 4/4] tests/checkpoint.bats: fix test hang/failure Commit a9e15e7e0 adds a check that stdin/out/err pipes are restored correctly. Commit ec260653b7d4e copy/pastes the same code to one more another test. Problem is (as pointed out in commit 5369f9ade3) these tests sometimes hang. I have also seen them fail. Apparently, the code used to create pipes and open them to fds is racy: ```shell cat $fifo | cat $fifo & pid=$! exec 50/proc/$pid/fd/0 ``` Since `cat | cat` is spawned asynchronously, by the time exec is used, the second cat process (i.e. $pid) is already fork'ed but it might not be exec'ed yet. As a result, we get this (`ls -l /proc/self/fd`): ``` lr-x------. 1 root root 64 Apr 20 02:39 50 -> /dev/pts/1 l-wx------. 1 root root 64 Apr 20 02:39 51 -> /dev/pts/1 ``` or, in some cases: ``` lr-x------. 1 root root 64 Apr 20 02:45 50 -> /dev/pts/1 l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]' ``` instead of expected set of pipes: ``` > lr-x------. 1 root root 64 Apr 20 02:45 50 -> 'pipe:[215791]' > l-wx------. 1 root root 64 Apr 20 02:45 51 -> 'pipe:[215791]' ``` One possible workaround is to add `sleep 0.1` or so after cat|cat, but it is outright ugly (besides, we already have one sleep in the test code). The solution is to not use any external processes to create pipes. I admit this still looks not very comprehensible, but at least it is easier than before, and it works. While at it, remove code duplication, moving the setup and check code into a pair of functions. Finally, since the tests are working now, remove the skip. Signed-off-by: Kir Kolyshkin --- tests/integration/checkpoint.bats | 93 +++++++++++-------------------- 1 file changed, 32 insertions(+), 61 deletions(-) diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index 7a7252f5..d9d188bb 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -3,9 +3,6 @@ load helpers function setup() { - if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then - skip "CRIU test suite is skipped on systemd cgroup driver for now." - fi # All checkpoint tests are currently failing on v2 requires cgroups_v1 # XXX: currently criu require root containers. @@ -19,6 +16,33 @@ function teardown() { teardown_busybox } +function setup_pipes() { + # The changes to 'terminal' are needed for running in detached mode + sed -i 's;"terminal": true;"terminal": false;' config.json + sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json + + # Create two sets of pipes + # for stdout/stderr + exec 52<> <(:) + exec 50/proc/self/fd/52 + exec 52>&- + # ... and stdin + exec 62<> <(:) + exec 60/proc/self/fd/62 + exec 62>&- +} + +function check_pipes() { + echo Ping >&61 + exec 61>&- + exec 51>&- + run cat <&50 + [ "$status" -eq 0 ] + [[ "${output}" == *"ponG Ping"* ]] +} + @test "checkpoint and restore" { runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] @@ -46,29 +70,7 @@ function teardown() { } @test "checkpoint --pre-dump and restore" { - # The changes to 'terminal' are needed for running in detached mode - sed -i 's;"terminal": true;"terminal": false;' config.json - sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json - - # The following code creates pipes for stdin and stdout. - # CRIU can't handle fifo-s, so we need all these tricks. - fifo=`mktemp -u /tmp/runc-fifo-XXXXXX` - mkfifo $fifo - - # stdout - cat $fifo | cat $fifo & - pid=$! - exec 50/proc/$pid/fd/0 - - # stdin - cat $fifo | cat $fifo & - pid=$! - exec 60/proc/$pid/fd/0 - - echo -n > $fifo - unlink $fifo + setup_pipes # run busybox __runc run -d test_busybox <&60 >&51 2>&51 @@ -107,12 +109,7 @@ function teardown() { [ "$status" -eq 0 ] [[ ${output} == "ok" ]] - echo Ping >&61 - exec 61>&- - exec 51>&- - run cat <&50 - [ "$status" -eq 0 ] - [[ "${output}" == *"ponG Ping"* ]] + check_pipes } @test "checkpoint --lazy-pages and restore" { @@ -122,16 +119,10 @@ function teardown() { skip "this criu does not support lazy migration" fi - # The changes to 'terminal' are needed for running in detached mode - sed -i 's;"terminal": true;"terminal": false;' config.json + setup_pipes + # This should not be necessary: https://github.com/checkpoint-restore/criu/issues/575 sed -i 's;"readonly": true;"readonly": false;' config.json - sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json - - # The following code creates pipes for stdin and stdout. - # CRIU can't handle fifo-s, so we need all these tricks. - fifo=`mktemp -u /tmp/runc-fifo-XXXXXX` - mkfifo $fifo # For lazy migration we need to know when CRIU is ready to serve # the memory pages via TCP. @@ -141,21 +132,6 @@ function teardown() { # TCP port for lazy migration port=27277 - # stdout - cat $fifo | cat $fifo & - pid=$! - exec 50/proc/$pid/fd/0 - - # stdin - cat $fifo | cat $fifo & - pid=$! - exec 60/proc/$pid/fd/0 - - echo -n > $fifo - unlink $fifo - # run busybox __runc run -d test_busybox <&60 >&51 2>&51 [ $? -eq 0 ] @@ -212,12 +188,7 @@ function teardown() { [ "$status" -eq 0 ] [[ ${output} == "ok" ]] - echo Ping >&61 - exec 61>&- - exec 51>&- - run cat <&50 - [ "$status" -eq 0 ] - [[ "${output}" == *"ponG Ping"* ]] + check_pipes } @test "checkpoint and restore in external network namespace" {