Merge pull request #613 from crosbymichael/seccomp-args

Golang seccomp package
This commit is contained in:
Mrunal Patel 2015-06-09 10:13:19 -07:00
commit 204502647d
13 changed files with 921 additions and 27 deletions

View File

@ -13,6 +13,40 @@ type IDMap struct {
Size int `json:"size"` Size int `json:"size"`
} }
type Seccomp struct {
Syscalls []*Syscall `json:"syscalls"`
}
type Action int
const (
Kill Action = iota - 3
Trap
Allow
)
type Operator int
const (
EqualTo Operator = iota
NotEqualTo
GreatherThan
LessThan
MaskEqualTo
)
type Arg struct {
Index int `json:"index"`
Value uint32 `json:"value"`
Op Operator `json:"op"`
}
type Syscall struct {
Value int `json:"value"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
}
// TODO Windows. Many of these fields should be factored out into those parts // TODO Windows. Many of these fields should be factored out into those parts
// which are common across platforms, and those which are platform specific. // which are common across platforms, and those which are platform specific.
@ -104,4 +138,9 @@ type Config struct {
// SystemProperties is a map of properties and their values. It is the equivalent of using // SystemProperties is a map of properties and their values. It is the equivalent of using
// sysctl -w my.property.name value in Linux. // sysctl -w my.property.name value in Linux.
SystemProperties map[string]string `json:"system_properties"` SystemProperties map[string]string `json:"system_properties"`
// Seccomp allows actions to be taken whenever a syscall is made within the container.
// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
// can be specified on a per syscall basis.
Seccomp *Seccomp `json:"seccomp"`
} }

View File

@ -13,6 +13,7 @@ import (
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/netlink" "github.com/docker/libcontainer/netlink"
"github.com/docker/libcontainer/seccomp"
"github.com/docker/libcontainer/system" "github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user" "github.com/docker/libcontainer/user"
"github.com/docker/libcontainer/utils" "github.com/docker/libcontainer/utils"
@ -259,3 +260,61 @@ func killCgroupProcesses(m cgroups.Manager) error {
} }
return nil return nil
} }
func finalizeSeccomp(config *initConfig) error {
if config.Config.Seccomp == nil {
return nil
}
context := seccomp.New()
for _, s := range config.Config.Seccomp.Syscalls {
ss := &seccomp.Syscall{
Value: uint32(s.Value),
Action: seccompAction(s.Action),
}
if len(s.Args) > 0 {
ss.Args = seccompArgs(s.Args)
}
context.Add(ss)
}
return context.Load()
}
func seccompAction(a configs.Action) seccomp.Action {
switch a {
case configs.Kill:
return seccomp.Kill
case configs.Trap:
return seccomp.Trap
case configs.Allow:
return seccomp.Allow
}
return seccomp.Error(syscall.Errno(int(a)))
}
func seccompArgs(args []*configs.Arg) seccomp.Args {
var sa []seccomp.Arg
for _, a := range args {
sa = append(sa, seccomp.Arg{
Index: uint32(a.Index),
Op: seccompOperator(a.Op),
Value: uint(a.Value),
})
}
return seccomp.Args{sa}
}
func seccompOperator(o configs.Operator) seccomp.Operator {
switch o {
case configs.EqualTo:
return seccomp.EqualTo
case configs.NotEqualTo:
return seccomp.NotEqualTo
case configs.GreatherThan:
return seccomp.GreatherThan
case configs.LessThan:
return seccomp.LessThan
case configs.MaskEqualTo:
return seccomp.MaskEqualTo
}
return 0
}

View File

@ -714,3 +714,27 @@ func TestSystemProperties(t *testing.T) {
t.Fatalf("kernel.shmmni property expected to be 8192, but is %s", shmmniOutput) t.Fatalf("kernel.shmmni property expected to be 8192, but is %s", shmmniOutput)
} }
} }
func TestSeccompNoChown(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{}
config.Seccomp.Syscalls = append(config.Seccomp.Syscalls, &configs.Syscall{
Value: syscall.SYS_CHOWN,
Action: configs.Action(syscall.EPERM),
})
buffers, _, err := runContainer(config, "", "/bin/sh", "-c", "chown 1:1 /tmp")
if err == nil {
t.Fatal("running chown in a container should fail")
}
if s := buffers.String(); !strings.Contains(s, "not permitted") {
t.Fatalf("running chown should result in an EPERM but got %q", s)
}
}

View File

@ -122,11 +122,11 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe
err = container.Start(process) err = container.Start(process)
if err != nil { if err != nil {
return nil, -1, err return buffers, -1, err
} }
ps, err := process.Wait() ps, err := process.Wait()
if err != nil { if err != nil {
return nil, -1, err return buffers, -1, err
} }
status := ps.Sys().(syscall.WaitStatus) status := ps.Sys().(syscall.WaitStatus)
if status.Exited() { if status.Exited() {
@ -134,7 +134,7 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe
} else if status.Signaled() { } else if status.Signaled() {
exitCode = -int(status.Signal()) exitCode = -int(status.Signal())
} else { } else {
return nil, -1, err return buffers, -1, err
} }
return return
} }

View File

@ -19,32 +19,33 @@ import (
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
var createFlags = []cli.Flag{ var createFlags = []cli.Flag{
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"}, cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"}, cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"},
cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"}, cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"},
cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"}, cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"},
cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"}, cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"},
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"},
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"}, cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"},
cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"}, cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"},
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"}, cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"},
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"}, cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"},
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"}, cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"},
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"}, cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
cli.StringFlag{Name: "security", Value: "", Usage: "set the security profile (high, medium, low)"},
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
cli.StringFlag{Name: "veth-address", Usage: "veth ip address"}, cli.StringFlag{Name: "veth-address", Usage: "veth ip address"},
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"}, cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"},
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"}, cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
cli.StringSliceFlag{Name: "sysctl", Value: &cli.StringSlice{}, Usage: "set system properties in the container"}, cli.StringSliceFlag{Name: "sysctl", Value: &cli.StringSlice{}, Usage: "set system properties in the container"},
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
} }
var configCommand = cli.Command{ var configCommand = cli.Command{
@ -203,6 +204,24 @@ func modify(config *configs.Config, context *cli.Context) {
Device: "cgroup", Device: "cgroup",
}) })
} }
modifySecurityProfile(context, config)
}
func modifySecurityProfile(context *cli.Context, config *configs.Config) {
profileName := context.String("security")
if profileName == "" {
return
}
profile := profiles[profileName]
if profile == nil {
logrus.Fatalf("invalid profile name %q", profileName)
}
config.Rlimits = profile.Rlimits
config.Capabilities = profile.Capabilities
config.Seccomp = profile.Seccomp
config.AppArmorProfile = profile.ApparmorProfile
config.MountLabel = profile.MountLabel
config.ProcessLabel = profile.ProcessLabel
} }
func getTemplate() *configs.Config { func getTemplate() *configs.Config {
@ -290,13 +309,5 @@ func getTemplate() *configs.Config {
Flags: defaultMountFlags | syscall.MS_RDONLY, Flags: defaultMountFlags | syscall.MS_RDONLY,
}, },
}, },
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
} }
} }

272
nsinit/security.go Normal file
View File

@ -0,0 +1,272 @@
package main
import (
"syscall"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/system"
)
var profiles = map[string]*securityProfile{
"high": highProfile,
"medium": mediumProfile,
"low": lowProfile,
}
type securityProfile struct {
Capabilities []string `json:"capabilities"`
ApparmorProfile string `json:"apparmor_profile"`
MountLabel string `json:"mount_label"`
ProcessLabel string `json:"process_label"`
Rlimits []configs.Rlimit `json:"rlimits"`
Seccomp *configs.Seccomp `json:"seccomp"`
}
// this should be a runtime config that is not able to do things like apt-get or yum install.
var highProfile = &securityProfile{
Capabilities: []string{
"NET_BIND_SERVICE",
"KILL",
"AUDIT_WRITE",
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
// http://man7.org/linux/man-pages/man2/syscalls.2.html
Seccomp: &configs.Seccomp{
Syscalls: []*configs.Syscall{
{
Value: syscall.SYS_CAPSET, // http://man7.org/linux/man-pages/man2/capset.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: int(system.SysSetns()),
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CHMOD, // http://man7.org/linux/man-pages/man2/chmod.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CHOWN, // http://man7.org/linux/man-pages/man2/chown.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_LINK, // http://man7.org/linux/man-pages/man2/link.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_LINKAT, // http://man7.org/linux/man-pages/man2/linkat.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UNLINK, // http://man7.org/linux/man-pages/man2/unlink.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UNLINKAT, // http://man7.org/linux/man-pages/man2/unlinkat.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CHROOT, // http://man7.org/linux/man-pages/man2/chroot.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_SETDOMAINNAME, // http://man7.org/linux/man-pages/man2/setdomainname.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_SETHOSTNAME, // http://man7.org/linux/man-pages/man2/sethostname.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
Action: configs.Action(syscall.EPERM),
Args: []*configs.Arg{
{
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
Value: syscall.CLONE_NEWUSER,
Op: configs.MaskEqualTo,
},
},
},
},
},
}
// This is a medium level profile that should be able to do things like installing from
// apt-get or yum.
var mediumProfile = &securityProfile{
Capabilities: []string{
"CHOWN",
"DAC_OVERRIDE",
"FSETID",
"FOWNER",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"KILL",
"AUDIT_WRITE",
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
// http://man7.org/linux/man-pages/man2/syscalls.2.html
Seccomp: &configs.Seccomp{
Syscalls: []*configs.Syscall{
{
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: int(system.SysSetns()),
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CHROOT, // http://man7.org/linux/man-pages/man2/chroot.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_SETDOMAINNAME, // http://man7.org/linux/man-pages/man2/setdomainname.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_SETHOSTNAME, // http://man7.org/linux/man-pages/man2/sethostname.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
Action: configs.Action(syscall.EPERM),
Args: []*configs.Arg{
{
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
Value: syscall.CLONE_NEWUSER,
Op: configs.MaskEqualTo,
},
},
},
},
},
}
var lowProfile = &securityProfile{
Capabilities: []string{
"CHOWN",
"DAC_OVERRIDE",
"FSETID",
"FOWNER",
"SETGID",
"SETUID",
"SYS_CHROOT",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"KILL",
"AUDIT_WRITE",
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
// http://man7.org/linux/man-pages/man2/syscalls.2.html
Seccomp: &configs.Seccomp{
Syscalls: []*configs.Syscall{
{
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: int(system.SysSetns()),
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
Action: configs.Action(syscall.EPERM),
Args: []*configs.Arg{
{
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
Value: syscall.CLONE_NEWUSER,
Op: configs.MaskEqualTo,
},
},
},
},
},
}

32
seccomp/bpf.go Normal file
View File

@ -0,0 +1,32 @@
package seccomp
import "strings"
type bpfLabel struct {
label string
location uint32
}
type bpfLabels []bpfLabel
// labelIndex returns the index for the label if it exists in the slice.
// if it does not exist in the slice it appends the label lb to the end
// of the slice and returns the index.
func labelIndex(labels *bpfLabels, lb string) uint32 {
var id uint32
for id = 0; id < uint32(len(*labels)); id++ {
if strings.EqualFold(lb, (*labels)[id].label) {
return id
}
}
*labels = append(*labels, bpfLabel{lb, 0xffffffff})
return id
}
func scmpBpfStmt(code uint16, k uint32) sockFilter {
return sockFilter{code, 0, 0, k}
}
func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
return sockFilter{code, jt, jf, k}
}

144
seccomp/context.go Normal file
View File

@ -0,0 +1,144 @@
package seccomp
import (
"errors"
"syscall"
)
const labelTemplate = "lb-%d-%d"
// Action is the type of action that will be taken when a
// syscall is performed.
type Action int
const (
Kill Action = iota - 3 // Kill the calling process of the syscall.
Trap // Trap and coredump the calling process of the syscall.
Allow // Allow the syscall to be completed.
)
// Syscall is the specified syscall, action, and any type of arguments
// to filter on.
type Syscall struct {
// Value is the syscall number.
Value uint32
// Action is the action to perform when the specified syscall is made.
Action Action
// Args are filters that can be specified on the arguments to the syscall.
Args Args
}
func (s *Syscall) scmpAction() uint32 {
switch s.Action {
case Allow:
return retAllow
case Trap:
return retTrap
case Kill:
return retKill
}
return actionErrno(uint32(s.Action))
}
// Arg represents an argument to the syscall with the argument's index,
// the operator to apply when matching, and the argument's value at that time.
type Arg struct {
Index uint32 // index of args which start from zero
Op Operator // operation, such as EQ/NE/GE/LE
Value uint // the value of arg
}
type Args [][]Arg
var (
ErrUnresolvedLabel = errors.New("seccomp: unresolved label")
ErrDuplicateLabel = errors.New("seccomp: duplicate label use")
ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument")
)
// Error returns an Action that will be used to send the calling
// process the specified errno when the syscall is made.
func Error(code syscall.Errno) Action {
return Action(code)
}
// New returns a new syscall context for use.
func New() *Context {
return &Context{
syscalls: make(map[uint32]*Syscall),
}
}
// Context holds syscalls for the current process to limit the type of
// actions the calling process can make.
type Context struct {
syscalls map[uint32]*Syscall
}
// Add will add the specified syscall, action, and arguments to the seccomp
// Context.
func (c *Context) Add(s *Syscall) {
c.syscalls[s.Value] = s
}
// Remove removes the specified syscall configuration from the Context.
func (c *Context) Remove(call uint32) {
delete(c.syscalls, call)
}
// Load will apply the Context to the calling process makeing any secccomp process changes
// apply after the context is loaded.
func (c *Context) Load() error {
filter, err := c.newFilter()
if err != nil {
return err
}
if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil {
return err
}
prog := newSockFprog(filter)
return prog.set()
}
func (c *Context) newFilter() ([]sockFilter, error) {
var (
labels bpfLabels
f = newFilter()
)
for _, s := range c.syscalls {
f.addSyscall(s, &labels)
}
f.allow()
// process args for the syscalls
for _, s := range c.syscalls {
if err := f.addArguments(s, &labels); err != nil {
return nil, err
}
}
// apply labels for arguments
idx := int32(len(*f) - 1)
for ; idx >= 0; idx-- {
lf := &(*f)[idx]
if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) {
continue
}
rel := int32(lf.jt)<<8 | int32(lf.jf)
if ((jumpJT << 8) | jumpJF) == rel {
if labels[lf.k].location == 0xffffffff {
return nil, ErrUnresolvedLabel
}
lf.k = labels[lf.k].location - uint32(idx+1)
lf.jt = 0
lf.jf = 0
} else if ((labelJT << 8) | labelJF) == rel {
if labels[lf.k].location != 0xffffffff {
return nil, ErrDuplicateLabel
}
labels[lf.k].location = uint32(idx)
lf.k = 0
lf.jt = 0
lf.jf = 0
}
}
return *f, nil
}

116
seccomp/filter.go Normal file
View File

@ -0,0 +1,116 @@
package seccomp
import (
"fmt"
"syscall"
"unsafe"
)
type sockFilter struct {
code uint16
jt uint8
jf uint8
k uint32
}
func newFilter() *filter {
var f filter
f = append(f, sockFilter{
pfLD + syscall.BPF_W + syscall.BPF_ABS,
0,
0,
uint32(unsafe.Offsetof(secData.nr)),
})
return &f
}
type filter []sockFilter
func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) {
if len(s.Args) == 0 {
f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()))
} else {
if len(s.Args[0]) > 0 {
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index)
f.call(s.Value,
scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
jumpJT, jumpJF))
}
}
}
func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error {
for i := 0; len(s.Args) > i; i++ {
if len(s.Args[i]) > 0 {
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index)
f.label(labels, lb)
f.arg(s.Args[i][0].Index)
}
for j := 0; j < len(s.Args[i]); j++ {
var jf sockFilter
if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 {
lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index)
jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA,
labelIndex(labels, lbj), jumpJT, jumpJF)
} else {
jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())
}
if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil {
return err
}
}
f.allow()
}
return nil
}
func (f *filter) label(labels *bpfLabels, lb string) {
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF))
}
func (f *filter) call(nr uint32, jt sockFilter) {
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1))
*f = append(*f, jt)
}
func (f *filter) allow() {
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow))
}
func (f *filter) deny() {
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap))
}
func (f *filter) arg(index uint32) {
arg(f, index)
}
func (f *filter) op(operation Operator, v uint, jf sockFilter) error {
switch operation {
case EqualTo:
jumpEqualTo(f, v, jf)
case NotEqualTo:
jumpNotEqualTo(f, v, jf)
case GreatherThan:
jumpGreaterThan(f, v, jf)
case LessThan:
jumpLessThan(f, v, jf)
case MaskEqualTo:
jumpMaskEqualTo(f, v, jf)
default:
return ErrUnsupportedOperation
}
return nil
}
func arg(f *filter, idx uint32) {
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx)))
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx)))
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1))
}
func jump(f *filter, labels *bpfLabels, lb string) {
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
jumpJT, jumpJF))
}

68
seccomp/jump_amd64.go Normal file
View File

@ -0,0 +1,68 @@
// +build linux,amd64
package seccomp
// Using BPF filters
//
// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf
import "syscall"
func jumpGreaterThan(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
func jumpEqualTo(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
func jumpLessThan(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
func jumpNotEqualTo(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
// this checks for a value inside a mask. The evalusation is equal to doing
// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER
func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v)))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}

122
seccomp/seccomp.go Normal file
View File

@ -0,0 +1,122 @@
// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go.
package seccomp
import (
"syscall"
"unsafe"
)
// Operator that is used for argument comparison.
type Operator int
const (
EqualTo Operator = iota
NotEqualTo
GreatherThan
LessThan
MaskEqualTo
)
const (
jumpJT = 0xff
jumpJF = 0xff
labelJT = 0xfe
labelJF = 0xfe
)
const (
pfLD = 0x0
retKill = 0x00000000
retTrap = 0x00030000
retAllow = 0x7fff0000
modeFilter = 0x2
prSetNoNewPrivileges = 0x26
)
func actionErrno(errno uint32) uint32 {
return 0x00050000 | (errno & 0x0000ffff)
}
var (
secData = struct {
nr int32
arch uint32
insPointer uint64
args [6]uint64
}{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
)
var isLittle = func() bool {
var (
x = 0x1234
p = unsafe.Pointer(&x)
p2 = (*[unsafe.Sizeof(0)]byte)(p)
)
if p2[0] == 0 {
return false
}
return true
}()
var endian endianSupport
type endianSupport struct {
}
func (e endianSupport) hi(i uint32) uint32 {
if isLittle {
return e.little(i)
}
return e.big(i)
}
func (e endianSupport) low(i uint32) uint32 {
if isLittle {
return e.big(i)
}
return e.little(i)
}
func (endianSupport) big(idx uint32) uint32 {
if idx >= 6 {
return 0
}
return uint32(unsafe.Offsetof(secData.args)) + 8*idx
}
func (endianSupport) little(idx uint32) uint32 {
if idx < 0 || idx >= 6 {
return 0
}
return uint32(unsafe.Offsetof(secData.args)) +
uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
}
func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error {
_, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
if err != 0 {
return err
}
return nil
}
func newSockFprog(filter []sockFilter) *sockFprog {
return &sockFprog{
len: uint16(len(filter)),
filt: filter,
}
}
type sockFprog struct {
len uint16
filt []sockFilter
}
func (s *sockFprog) set() error {
_, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
uintptr(modeFilter), uintptr(unsafe.Pointer(s)))
if err != 0 {
return err
}
return nil
}

View File

@ -99,5 +99,8 @@ func (l *linuxStandardInit) Init() error {
if syscall.Getppid() != l.parentPid { if syscall.Getppid() != l.parentPid {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
} }
if err := finalizeSeccomp(l.config); err != nil {
return err
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
} }

View File

@ -21,16 +21,20 @@ var setNsMap = map[string]uintptr{
"linux/s390x": 339, "linux/s390x": 339,
} }
var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
func SysSetns() uint32 {
return uint32(sysSetns)
}
func Setns(fd uintptr, flags uintptr) error { func Setns(fd uintptr, flags uintptr) error {
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
if !exists { if !exists {
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
} }
_, _, err := syscall.RawSyscall(ns, fd, flags, 0) _, _, err := syscall.RawSyscall(ns, fd, flags, 0)
if err != 0 { if err != 0 {
return err return err
} }
return nil return nil
} }