#!/usr/bin/env bats load helpers function setup() { if [[ -n "${RUNC_USE_SYSTEMD}" ]] ; then skip "CRIU test suite is skipped on systemd cgroup driver for now." fi # All checkpoint tests are currently failing on v2 requires cgroups_v1 teardown_busybox setup_busybox } function teardown() { teardown_busybox } @test "checkpoint and restore" { # XXX: currently criu require root containers. requires criu root runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] testcontainer test_busybox running for i in `seq 2`; do # checkpoint the running container runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox cat ./work-dir/dump.log | grep -B 5 Error || true [ "$status" -eq 0 ] # after checkpoint busybox is no longer running runc state test_busybox [ "$status" -ne 0 ] # restore from checkpoint runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox ret=$? cat ./work-dir/restore.log | grep -B 5 Error || true [ "$ret" -eq 0 ] # busybox should be back up and running testcontainer test_busybox running done } @test "checkpoint --pre-dump and restore" { # XXX: currently criu require root containers. requires criu root # The changes to 'terminal' are needed for running in detached mode sed -i 's;"terminal": true;"terminal": false;' config.json sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json # The following code creates pipes for stdin and stdout. # CRIU can't handle fifo-s, so we need all these tricks. fifo=`mktemp -u /tmp/runc-fifo-XXXXXX` mkfifo $fifo # stdout cat $fifo | cat $fifo & pid=$! exec 50/proc/$pid/fd/0 # stdin cat $fifo | cat $fifo & pid=$! exec 60/proc/$pid/fd/0 echo -n > $fifo unlink $fifo # run busybox __runc run -d test_busybox <&60 >&51 2>&51 [ $? -eq 0 ] testcontainer test_busybox running #test checkpoint pre-dump mkdir parent-dir runc --criu "$CRIU" checkpoint --pre-dump --image-path ./parent-dir test_busybox [ "$status" -eq 0 ] # busybox should still be running runc state test_busybox [ "$status" -eq 0 ] [[ "${output}" == *"running"* ]] # checkpoint the running container mkdir image-dir mkdir work-dir runc --criu "$CRIU" checkpoint --parent-path ./parent-dir --work-path ./work-dir --image-path ./image-dir test_busybox cat ./work-dir/dump.log | grep -B 5 Error || true [ "$status" -eq 0 ] # after checkpoint busybox is no longer running runc state test_busybox [ "$status" -ne 0 ] # restore from checkpoint __runc --criu "$CRIU" restore -d --work-path ./work-dir --image-path ./image-dir test_busybox <&60 >&51 2>&51 ret=$? cat ./work-dir/restore.log | grep -B 5 Error || true [ $ret -eq 0 ] # busybox should be back up and running testcontainer test_busybox running runc exec --cwd /bin test_busybox echo ok [ "$status" -eq 0 ] [[ ${output} == "ok" ]] echo Ping >&61 exec 61>&- exec 51>&- run cat <&50 [ "$status" -eq 0 ] [[ "${output}" == *"ponG Ping"* ]] } @test "checkpoint --lazy-pages and restore" { # XXX: currently criu require root containers. requires criu root # check if lazy-pages is supported run ${CRIU} check --feature uffd-noncoop if [ "$status" -eq 1 ]; then # this criu does not support lazy migration; skip the test skip "this criu does not support lazy migration" fi # The changes to 'terminal' are needed for running in detached mode sed -i 's;"terminal": true;"terminal": false;' config.json # This should not be necessary: https://github.com/checkpoint-restore/criu/issues/575 sed -i 's;"readonly": true;"readonly": false;' config.json sed -i 's/"sh"/"sh","-c","for i in `seq 10`; do read xxx || continue; echo ponG $xxx; done"/' config.json # The following code creates pipes for stdin and stdout. # CRIU can't handle fifo-s, so we need all these tricks. fifo=`mktemp -u /tmp/runc-fifo-XXXXXX` mkfifo $fifo # For lazy migration we need to know when CRIU is ready to serve # the memory pages via TCP. lazy_pipe=`mktemp -u /tmp/lazy-pipe-XXXXXX` mkfifo $lazy_pipe # TCP port for lazy migration port=27277 # stdout cat $fifo | cat $fifo & pid=$! exec 50/proc/$pid/fd/0 # stdin cat $fifo | cat $fifo & pid=$! exec 60/proc/$pid/fd/0 echo -n > $fifo unlink $fifo # run busybox __runc run -d test_busybox <&60 >&51 2>&51 [ $? -eq 0 ] testcontainer test_busybox running # checkpoint the running container mkdir image-dir mkdir work-dir # Double fork taken from helpers.bats # We need to start 'runc checkpoint --lazy-pages' in the background, # so we double fork in the shell. (runc --criu "$CRIU" checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_pipe} --work-path ./work-dir --image-path ./image-dir test_busybox & ) & # Sleeping here. This is ugly, but not sure how else to handle it. # The return code of the in the background running runc is needed, if # there is some basic error. If the lazy migration is ready can # be handled by $lazy_pipe. Which probably will always be ready # after sleeping two seconds. sleep 2 # Check if inventory.img was written [ -e image-dir/inventory.img ] # If the inventory.img exists criu checkpointed some things, let's see # if there were other errors in the log file. run grep -B 5 Error ./work-dir/dump.log -q [ "$status" -eq 1 ] # This will block until CRIU is ready to serve memory pages cat $lazy_pipe [ "$status" -eq 1 ] unlink $lazy_pipe # Double fork taken from helpers.bats # We need to start 'criu lazy-pages' in the background, # so we double fork in the shell. # Start CRIU in lazy-daemon mode $(${CRIU} lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir &) & # Restore lazily from checkpoint. # The restored container needs a different name as the checkpointed # container is not yet destroyed. It is only destroyed at that point # in time when the last page is lazily transferred to the destination. # Killing the CRIU on the checkpoint side will let the container # continue to run if the migration failed at some point. __runc --criu "$CRIU" restore -d --work-path ./image-dir --image-path ./image-dir --lazy-pages test_busybox_restore <&60 >&51 2>&51 [ $? -eq 0 ] run grep -B 5 Error ./work-dir/dump.log -q [ "$status" -eq 1 ] # busybox should be back up and running testcontainer test_busybox_restore running runc exec --cwd /bin test_busybox_restore echo ok [ "$status" -eq 0 ] [[ ${output} == "ok" ]] echo Ping >&61 exec 61>&- exec 51>&- run cat <&50 [ "$status" -eq 0 ] [[ "${output}" == *"ponG Ping"* ]] } @test "checkpoint and restore in external network namespace" { # XXX: currently criu require root containers. requires criu root # check if external_net_ns is supported; only with criu 3.10++ run ${CRIU} check --feature external_net_ns if [ "$status" -eq 1 ]; then # this criu does not support external_net_ns; skip the test skip "this criu does not support external network namespaces" fi # create a temporary name for the test network namespace tmp=`mktemp` rm -f $tmp ns_name=`basename $tmp` # create network namespace ip netns add $ns_name ns_path=`ip netns add $ns_name 2>&1 | sed -e 's/.*"\(.*\)".*/\1/'` ns_inode=`ls -iL $ns_path | awk '{ print $1 }'` # tell runc which network namespace to use sed -i "s;\"type\": \"network\";\"type\": \"network\",\"path\": \"$ns_path\";" config.json runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] testcontainer test_busybox running for i in `seq 2`; do # checkpoint the running container; this automatically tells CRIU to # handle the network namespace defined in config.json as an external runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox # if you are having problems getting criu to work uncomment the following dump: #cat /run/opencontainer/containers/test_busybox/criu.work/dump.log cat ./work-dir/dump.log | grep -B 5 Error || true [ "$status" -eq 0 ] # after checkpoint busybox is no longer running runc state test_busybox [ "$status" -ne 0 ] # restore from checkpoint; this should restore the container into the existing network namespace runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox ret=$? cat ./work-dir/restore.log | grep -B 5 Error || true [ "$ret" -eq 0 ] # busybox should be back up and running testcontainer test_busybox running # container should be running in same network namespace as before pid=`__runc state test_busybox | jq '.pid'` ns_inode_new=`readlink /proc/$pid/ns/net | sed -e 's/.*\[\(.*\)\]/\1/'` echo "old network namespace inode $ns_inode" echo "new network namespace inode $ns_inode_new" [ "$ns_inode" -eq "$ns_inode_new" ] done ip netns del $ns_name } @test "checkpoint and restore with container specific CRIU config" { # XXX: currently criu require root containers. requires criu root tmp=`mktemp /tmp/runc-criu-XXXXXX.conf` # This is the file we write to /etc/criu/default.conf tmplog1=`mktemp /tmp/runc-criu-log-XXXXXX.log` unlink $tmplog1 tmplog1=`basename $tmplog1` # That is the actual configuration file to be used tmplog2=`mktemp /tmp/runc-criu-log-XXXXXX.log` unlink $tmplog2 tmplog2=`basename $tmplog2` # This adds the annotation 'org.criu.config' to set a container # specific CRIU config file. sed -i "s;\"process\";\"annotations\":{\"org.criu.config\": \"$tmp\"},\"process\";" config.json # Tell CRIU to use another configuration file mkdir -p /etc/criu echo "log-file=$tmplog1" > /etc/criu/default.conf # Make sure the RPC defined configuration file overwrites the previous echo "log-file=$tmplog2" > $tmp runc run -d --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] testcontainer test_busybox running # checkpoint the running container runc --criu "$CRIU" checkpoint --work-path ./work-dir test_busybox [ "$status" -eq 0 ] ! test -f ./work-dir/$tmplog1 test -f ./work-dir/$tmplog2 # after checkpoint busybox is no longer running runc state test_busybox [ "$status" -ne 0 ] test -f ./work-dir/$tmplog2 && unlink ./work-dir/$tmplog2 # restore from checkpoint runc --criu "$CRIU" restore -d --work-path ./work-dir --console-socket $CONSOLE_SOCKET test_busybox [ "$status" -eq 0 ] ! test -f ./work-dir/$tmplog1 test -f ./work-dir/$tmplog2 # busybox should be back up and running testcontainer test_busybox running unlink $tmp test -f ./work-dir/$tmplog2 && unlink ./work-dir/$tmplog2 }