fix systemd-notify when using a different PID namespace

The current support of systemd-notify has a race condition as the
message send to the systemd notify socket might be dropped if the sender
process is not running by the time systemd checks for the sender of the
datagram.  A proper fix of this in systemd would require changes to the
kernel to maintain the cgroup of the sender process when it is dead (but
it is not probably going to happen...)
Generally, the solution to this issue is to specify the PID in the
message itself so that systemd has not to guess the sender, but this
wouldn't work when running in a PID namespace as the container will pass
the PID known in its namespace (something like PID=1,2,3..) and systemd
running on the host is not able to map it to the runc service.

The proposed solution is to have a proxy in runc that forwards the
messages to the host systemd.

Example of this issue:

https://github.com/projectatomic/atomic-system-containers/pull/24

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
This commit is contained in:
Giuseppe Scrivano 2017-02-03 14:17:34 +01:00
parent 1c9c074d79
commit 892f2ded6f
5 changed files with 122 additions and 16 deletions

81
notify_socket.go Normal file
View File

@ -0,0 +1,81 @@
// +build linux
package main
import (
"fmt"
"net"
"path/filepath"
"github.com/Sirupsen/logrus"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/urfave/cli"
)
type notifySocket struct {
socket *net.UnixConn
host string
socketPath string
}
func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *notifySocket {
if notifySocketHost == "" {
return nil
}
root := filepath.Join(context.GlobalString("root"), id)
path := filepath.Join(root, "notify.sock")
notifySocket := &notifySocket{
socket: nil,
host: notifySocketHost,
socketPath: path,
}
return notifySocket
}
func (ns *notifySocket) Close() error {
return ns.socket.Close()
}
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) {
mount := specs.Mount{Destination: s.host, Type: "bind", Source: s.socketPath, Options: []string{"bind"}}
spec.Mounts = append(spec.Mounts, mount)
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
}
func (s *notifySocket) setupSocket() error {
addr := net.UnixAddr{
Name: s.socketPath,
Net: "unixgram",
}
socket, err := net.ListenUnixgram("unixgram", &addr)
if err != nil {
return err
}
s.socket = socket
return nil
}
func (notifySocket *notifySocket) run() {
buf := make([]byte, 512)
notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
if err != nil {
logrus.Error(err)
return
}
for {
r, err := notifySocket.socket.Read(buf)
if err != nil {
break
}
client.Write(buf[0:r])
}
}

View File

@ -165,7 +165,14 @@ func restoreContainer(context *cli.Context, spec *specs.Spec, config *configs.Co
if err != nil {
return -1, err
}
handler := newSignalHandler(!context.Bool("no-subreaper"))
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
if notifySocket != nil {
notifySocket.setupSpec(context, spec)
notifySocket.setupSocket()
}
handler := newSignalHandler(!context.Bool("no-subreaper"), notifySocket)
if err := container.Restore(process, options); err != nil {
return -1, err
}

View File

@ -17,7 +17,9 @@ const signalBufferSize = 2048
// newSignalHandler returns a signal handler for processing SIGCHLD and SIGWINCH signals
// while still forwarding all other signals to the process.
func newSignalHandler(enableSubreaper bool) *signalHandler {
// If notifySocket is present, use it to read systemd notifications from the container and
// forward them to notifySocketHost.
func newSignalHandler(enableSubreaper bool, notifySocket *notifySocket) *signalHandler {
if enableSubreaper {
// set us as the subreaper before registering the signal handler for the container
if err := system.SetSubreaper(1); err != nil {
@ -30,7 +32,8 @@ func newSignalHandler(enableSubreaper bool) *signalHandler {
// handle all signals for the process.
signal.Notify(s)
return &signalHandler{
signals: s,
signals: s,
notifySocket: notifySocket,
}
}
@ -42,7 +45,8 @@ type exit struct {
}
type signalHandler struct {
signals chan os.Signal
signals chan os.Signal
notifySocket *notifySocket
}
// forward handles the main signal event loop forwarding, resizing, or reaping depending
@ -54,6 +58,11 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty) (int, e
if err != nil {
return -1, err
}
if h.notifySocket != nil {
go h.notifySocket.run()
}
// perform the initial tty resize.
tty.resize()
for s := range h.signals {
@ -75,6 +84,9 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty) (int, e
// status because we must ensure that any of the go specific process
// fun such as flushing pipes are complete before we return.
process.Wait()
if h.notifySocket != nil {
h.notifySocket.Close()
}
return e.status, nil
}
}

View File

@ -58,10 +58,6 @@ func setupSpec(context *cli.Context) (*specs.Spec, error) {
if err != nil {
return nil, err
}
notifySocket := os.Getenv("NOTIFY_SOCKET")
if notifySocket != "" {
setupSdNotify(spec, notifySocket)
}
if os.Geteuid() != 0 {
return nil, fmt.Errorf("runc should be run as root")
}

View File

@ -95,13 +95,6 @@ func newProcess(p specs.Process) (*libcontainer.Process, error) {
return lp, nil
}
// If systemd is supporting sd_notify protocol, this function will add support
// for sd_notify protocol from within the container.
func setupSdNotify(spec *specs.Spec, notifySocket string) {
spec.Mounts = append(spec.Mounts, specs.Mount{Destination: notifySocket, Type: "bind", Source: notifySocket, Options: []string{"bind"}})
spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", notifySocket))
}
func destroy(container libcontainer.Container) {
if err := container.Destroy(); err != nil {
logrus.Error(err)
@ -184,6 +177,7 @@ type runner struct {
consoleSocket string
container libcontainer.Container
create bool
notifySocket *notifySocket
}
func (r *runner) terminalinfo() *libcontainer.TerminalInfo {
@ -225,6 +219,10 @@ func (r *runner) run(config *specs.Process) (int, error) {
r.destroy()
return -1, fmt.Errorf("cannot use console socket if runc will not detach or allocate tty")
}
if detach && r.notifySocket != nil {
r.destroy()
return -1, fmt.Errorf("cannot detach when using NOTIFY_SOCKET")
}
startFn := r.container.Start
if !r.create {
@ -233,7 +231,7 @@ func (r *runner) run(config *specs.Process) (int, error) {
// Setting up IO is a two stage process. We need to modify process to deal
// with detaching containers, and then we get a tty after the container has
// started.
handler := newSignalHandler(r.enableSubreaper)
handler := newSignalHandler(r.enableSubreaper, r.notifySocket)
tty, err := setupIO(process, rootuid, rootgid, config.Terminal, detach)
if err != nil {
r.destroy()
@ -336,10 +334,21 @@ func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, e
if id == "" {
return -1, errEmptyID
}
notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
if notifySocket != nil {
notifySocket.setupSpec(context, spec)
}
container, err := createContainer(context, id, spec)
if err != nil {
return -1, err
}
if notifySocket != nil {
notifySocket.setupSocket()
}
// Support on-demand socket activation by passing file descriptors into the container init process.
listenFDs := []*os.File{}
if os.Getenv("LISTEN_FDS") != "" {
@ -350,6 +359,7 @@ func startContainer(context *cli.Context, spec *specs.Spec, create bool) (int, e
shouldDestroy: true,
container: container,
listenFDs: listenFDs,
notifySocket: notifySocket,
consoleSocket: context.String("console-socket"),
detach: context.Bool("detach"),
pidFile: context.String("pid-file"),