Merge pull request #613 from crosbymichael/seccomp-args
Golang seccomp package
This commit is contained in:
commit
204502647d
|
@ -13,6 +13,40 @@ type IDMap struct {
|
|||
Size int `json:"size"`
|
||||
}
|
||||
|
||||
type Seccomp struct {
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
}
|
||||
|
||||
type Action int
|
||||
|
||||
const (
|
||||
Kill Action = iota - 3
|
||||
Trap
|
||||
Allow
|
||||
)
|
||||
|
||||
type Operator int
|
||||
|
||||
const (
|
||||
EqualTo Operator = iota
|
||||
NotEqualTo
|
||||
GreatherThan
|
||||
LessThan
|
||||
MaskEqualTo
|
||||
)
|
||||
|
||||
type Arg struct {
|
||||
Index int `json:"index"`
|
||||
Value uint32 `json:"value"`
|
||||
Op Operator `json:"op"`
|
||||
}
|
||||
|
||||
type Syscall struct {
|
||||
Value int `json:"value"`
|
||||
Action Action `json:"action"`
|
||||
Args []*Arg `json:"args"`
|
||||
}
|
||||
|
||||
// TODO Windows. Many of these fields should be factored out into those parts
|
||||
// which are common across platforms, and those which are platform specific.
|
||||
|
||||
|
@ -104,4 +138,9 @@ type Config struct {
|
|||
// SystemProperties is a map of properties and their values. It is the equivalent of using
|
||||
// sysctl -w my.property.name value in Linux.
|
||||
SystemProperties map[string]string `json:"system_properties"`
|
||||
|
||||
// Seccomp allows actions to be taken whenever a syscall is made within the container.
|
||||
// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
|
||||
// can be specified on a per syscall basis.
|
||||
Seccomp *Seccomp `json:"seccomp"`
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@ import (
|
|||
"github.com/docker/libcontainer/cgroups"
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/netlink"
|
||||
"github.com/docker/libcontainer/seccomp"
|
||||
"github.com/docker/libcontainer/system"
|
||||
"github.com/docker/libcontainer/user"
|
||||
"github.com/docker/libcontainer/utils"
|
||||
|
@ -259,3 +260,61 @@ func killCgroupProcesses(m cgroups.Manager) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func finalizeSeccomp(config *initConfig) error {
|
||||
if config.Config.Seccomp == nil {
|
||||
return nil
|
||||
}
|
||||
context := seccomp.New()
|
||||
for _, s := range config.Config.Seccomp.Syscalls {
|
||||
ss := &seccomp.Syscall{
|
||||
Value: uint32(s.Value),
|
||||
Action: seccompAction(s.Action),
|
||||
}
|
||||
if len(s.Args) > 0 {
|
||||
ss.Args = seccompArgs(s.Args)
|
||||
}
|
||||
context.Add(ss)
|
||||
}
|
||||
return context.Load()
|
||||
}
|
||||
|
||||
func seccompAction(a configs.Action) seccomp.Action {
|
||||
switch a {
|
||||
case configs.Kill:
|
||||
return seccomp.Kill
|
||||
case configs.Trap:
|
||||
return seccomp.Trap
|
||||
case configs.Allow:
|
||||
return seccomp.Allow
|
||||
}
|
||||
return seccomp.Error(syscall.Errno(int(a)))
|
||||
}
|
||||
|
||||
func seccompArgs(args []*configs.Arg) seccomp.Args {
|
||||
var sa []seccomp.Arg
|
||||
for _, a := range args {
|
||||
sa = append(sa, seccomp.Arg{
|
||||
Index: uint32(a.Index),
|
||||
Op: seccompOperator(a.Op),
|
||||
Value: uint(a.Value),
|
||||
})
|
||||
}
|
||||
return seccomp.Args{sa}
|
||||
}
|
||||
|
||||
func seccompOperator(o configs.Operator) seccomp.Operator {
|
||||
switch o {
|
||||
case configs.EqualTo:
|
||||
return seccomp.EqualTo
|
||||
case configs.NotEqualTo:
|
||||
return seccomp.NotEqualTo
|
||||
case configs.GreatherThan:
|
||||
return seccomp.GreatherThan
|
||||
case configs.LessThan:
|
||||
return seccomp.LessThan
|
||||
case configs.MaskEqualTo:
|
||||
return seccomp.MaskEqualTo
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
|
|
@ -714,3 +714,27 @@ func TestSystemProperties(t *testing.T) {
|
|||
t.Fatalf("kernel.shmmni property expected to be 8192, but is %s", shmmniOutput)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSeccompNoChown(t *testing.T) {
|
||||
if testing.Short() {
|
||||
return
|
||||
}
|
||||
rootfs, err := newRootfs()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer remove(rootfs)
|
||||
config := newTemplateConfig(rootfs)
|
||||
config.Seccomp = &configs.Seccomp{}
|
||||
config.Seccomp.Syscalls = append(config.Seccomp.Syscalls, &configs.Syscall{
|
||||
Value: syscall.SYS_CHOWN,
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
})
|
||||
buffers, _, err := runContainer(config, "", "/bin/sh", "-c", "chown 1:1 /tmp")
|
||||
if err == nil {
|
||||
t.Fatal("running chown in a container should fail")
|
||||
}
|
||||
if s := buffers.String(); !strings.Contains(s, "not permitted") {
|
||||
t.Fatalf("running chown should result in an EPERM but got %q", s)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,11 +122,11 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe
|
|||
|
||||
err = container.Start(process)
|
||||
if err != nil {
|
||||
return nil, -1, err
|
||||
return buffers, -1, err
|
||||
}
|
||||
ps, err := process.Wait()
|
||||
if err != nil {
|
||||
return nil, -1, err
|
||||
return buffers, -1, err
|
||||
}
|
||||
status := ps.Sys().(syscall.WaitStatus)
|
||||
if status.Exited() {
|
||||
|
@ -134,7 +134,7 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe
|
|||
} else if status.Signaled() {
|
||||
exitCode = -int(status.Signal())
|
||||
} else {
|
||||
return nil, -1, err
|
||||
return buffers, -1, err
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -19,32 +19,33 @@ import (
|
|||
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||
|
||||
var createFlags = []cli.Flag{
|
||||
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"},
|
||||
cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
|
||||
cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"},
|
||||
cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
|
||||
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
|
||||
cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"},
|
||||
cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"},
|
||||
cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"},
|
||||
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"},
|
||||
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
|
||||
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
|
||||
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
|
||||
cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"},
|
||||
cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"},
|
||||
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
|
||||
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
|
||||
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
|
||||
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
|
||||
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
|
||||
cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"},
|
||||
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
|
||||
cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"},
|
||||
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
|
||||
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
|
||||
cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"},
|
||||
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
|
||||
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
|
||||
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
|
||||
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
|
||||
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
|
||||
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
|
||||
cli.StringFlag{Name: "security", Value: "", Usage: "set the security profile (high, medium, low)"},
|
||||
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
|
||||
cli.StringFlag{Name: "veth-address", Usage: "veth ip address"},
|
||||
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
|
||||
cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"},
|
||||
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
|
||||
cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
|
||||
cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
|
||||
cli.StringSliceFlag{Name: "sysctl", Value: &cli.StringSlice{}, Usage: "set system properties in the container"},
|
||||
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
|
||||
}
|
||||
|
||||
var configCommand = cli.Command{
|
||||
|
@ -203,6 +204,24 @@ func modify(config *configs.Config, context *cli.Context) {
|
|||
Device: "cgroup",
|
||||
})
|
||||
}
|
||||
modifySecurityProfile(context, config)
|
||||
}
|
||||
|
||||
func modifySecurityProfile(context *cli.Context, config *configs.Config) {
|
||||
profileName := context.String("security")
|
||||
if profileName == "" {
|
||||
return
|
||||
}
|
||||
profile := profiles[profileName]
|
||||
if profile == nil {
|
||||
logrus.Fatalf("invalid profile name %q", profileName)
|
||||
}
|
||||
config.Rlimits = profile.Rlimits
|
||||
config.Capabilities = profile.Capabilities
|
||||
config.Seccomp = profile.Seccomp
|
||||
config.AppArmorProfile = profile.ApparmorProfile
|
||||
config.MountLabel = profile.MountLabel
|
||||
config.ProcessLabel = profile.ProcessLabel
|
||||
}
|
||||
|
||||
func getTemplate() *configs.Config {
|
||||
|
@ -290,13 +309,5 @@ func getTemplate() *configs.Config {
|
|||
Flags: defaultMountFlags | syscall.MS_RDONLY,
|
||||
},
|
||||
},
|
||||
Rlimits: []configs.Rlimit{
|
||||
{
|
||||
Type: syscall.RLIMIT_NOFILE,
|
||||
Hard: 1024,
|
||||
Soft: 1024,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,272 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
|
||||
"github.com/docker/libcontainer/configs"
|
||||
"github.com/docker/libcontainer/system"
|
||||
)
|
||||
|
||||
var profiles = map[string]*securityProfile{
|
||||
"high": highProfile,
|
||||
"medium": mediumProfile,
|
||||
"low": lowProfile,
|
||||
}
|
||||
|
||||
type securityProfile struct {
|
||||
Capabilities []string `json:"capabilities"`
|
||||
ApparmorProfile string `json:"apparmor_profile"`
|
||||
MountLabel string `json:"mount_label"`
|
||||
ProcessLabel string `json:"process_label"`
|
||||
Rlimits []configs.Rlimit `json:"rlimits"`
|
||||
Seccomp *configs.Seccomp `json:"seccomp"`
|
||||
}
|
||||
|
||||
// this should be a runtime config that is not able to do things like apt-get or yum install.
|
||||
var highProfile = &securityProfile{
|
||||
Capabilities: []string{
|
||||
"NET_BIND_SERVICE",
|
||||
"KILL",
|
||||
"AUDIT_WRITE",
|
||||
},
|
||||
Rlimits: []configs.Rlimit{
|
||||
{
|
||||
Type: syscall.RLIMIT_NOFILE,
|
||||
Hard: 1024,
|
||||
Soft: 1024,
|
||||
},
|
||||
},
|
||||
// http://man7.org/linux/man-pages/man2/syscalls.2.html
|
||||
Seccomp: &configs.Seccomp{
|
||||
Syscalls: []*configs.Syscall{
|
||||
{
|
||||
Value: syscall.SYS_CAPSET, // http://man7.org/linux/man-pages/man2/capset.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: int(system.SysSetns()),
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CHMOD, // http://man7.org/linux/man-pages/man2/chmod.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CHOWN, // http://man7.org/linux/man-pages/man2/chown.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_LINK, // http://man7.org/linux/man-pages/man2/link.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_LINKAT, // http://man7.org/linux/man-pages/man2/linkat.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_UNLINK, // http://man7.org/linux/man-pages/man2/unlink.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_UNLINKAT, // http://man7.org/linux/man-pages/man2/unlinkat.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CHROOT, // http://man7.org/linux/man-pages/man2/chroot.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_SETDOMAINNAME, // http://man7.org/linux/man-pages/man2/setdomainname.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_SETHOSTNAME, // http://man7.org/linux/man-pages/man2/sethostname.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
Args: []*configs.Arg{
|
||||
{
|
||||
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
|
||||
Value: syscall.CLONE_NEWUSER,
|
||||
Op: configs.MaskEqualTo,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
// This is a medium level profile that should be able to do things like installing from
|
||||
// apt-get or yum.
|
||||
var mediumProfile = &securityProfile{
|
||||
Capabilities: []string{
|
||||
"CHOWN",
|
||||
"DAC_OVERRIDE",
|
||||
"FSETID",
|
||||
"FOWNER",
|
||||
"SETGID",
|
||||
"SETUID",
|
||||
"SETFCAP",
|
||||
"SETPCAP",
|
||||
"NET_BIND_SERVICE",
|
||||
"KILL",
|
||||
"AUDIT_WRITE",
|
||||
},
|
||||
Rlimits: []configs.Rlimit{
|
||||
{
|
||||
Type: syscall.RLIMIT_NOFILE,
|
||||
Hard: 1024,
|
||||
Soft: 1024,
|
||||
},
|
||||
},
|
||||
// http://man7.org/linux/man-pages/man2/syscalls.2.html
|
||||
Seccomp: &configs.Seccomp{
|
||||
Syscalls: []*configs.Syscall{
|
||||
{
|
||||
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: int(system.SysSetns()),
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CHROOT, // http://man7.org/linux/man-pages/man2/chroot.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_SETDOMAINNAME, // http://man7.org/linux/man-pages/man2/setdomainname.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_SETHOSTNAME, // http://man7.org/linux/man-pages/man2/sethostname.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
Args: []*configs.Arg{
|
||||
{
|
||||
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
|
||||
Value: syscall.CLONE_NEWUSER,
|
||||
Op: configs.MaskEqualTo,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var lowProfile = &securityProfile{
|
||||
Capabilities: []string{
|
||||
"CHOWN",
|
||||
"DAC_OVERRIDE",
|
||||
"FSETID",
|
||||
"FOWNER",
|
||||
"SETGID",
|
||||
"SETUID",
|
||||
"SYS_CHROOT",
|
||||
"SETFCAP",
|
||||
"SETPCAP",
|
||||
"NET_BIND_SERVICE",
|
||||
"KILL",
|
||||
"AUDIT_WRITE",
|
||||
},
|
||||
Rlimits: []configs.Rlimit{
|
||||
{
|
||||
Type: syscall.RLIMIT_NOFILE,
|
||||
Hard: 1024,
|
||||
Soft: 1024,
|
||||
},
|
||||
},
|
||||
// http://man7.org/linux/man-pages/man2/syscalls.2.html
|
||||
Seccomp: &configs.Seccomp{
|
||||
Syscalls: []*configs.Syscall{
|
||||
{
|
||||
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: int(system.SysSetns()),
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
},
|
||||
{
|
||||
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
|
||||
Action: configs.Action(syscall.EPERM),
|
||||
Args: []*configs.Arg{
|
||||
{
|
||||
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
|
||||
Value: syscall.CLONE_NEWUSER,
|
||||
Op: configs.MaskEqualTo,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
package seccomp
|
||||
|
||||
import "strings"
|
||||
|
||||
type bpfLabel struct {
|
||||
label string
|
||||
location uint32
|
||||
}
|
||||
|
||||
type bpfLabels []bpfLabel
|
||||
|
||||
// labelIndex returns the index for the label if it exists in the slice.
|
||||
// if it does not exist in the slice it appends the label lb to the end
|
||||
// of the slice and returns the index.
|
||||
func labelIndex(labels *bpfLabels, lb string) uint32 {
|
||||
var id uint32
|
||||
for id = 0; id < uint32(len(*labels)); id++ {
|
||||
if strings.EqualFold(lb, (*labels)[id].label) {
|
||||
return id
|
||||
}
|
||||
}
|
||||
*labels = append(*labels, bpfLabel{lb, 0xffffffff})
|
||||
return id
|
||||
}
|
||||
|
||||
func scmpBpfStmt(code uint16, k uint32) sockFilter {
|
||||
return sockFilter{code, 0, 0, k}
|
||||
}
|
||||
|
||||
func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
|
||||
return sockFilter{code, jt, jf, k}
|
||||
}
|
|
@ -0,0 +1,144 @@
|
|||
package seccomp
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
const labelTemplate = "lb-%d-%d"
|
||||
|
||||
// Action is the type of action that will be taken when a
|
||||
// syscall is performed.
|
||||
type Action int
|
||||
|
||||
const (
|
||||
Kill Action = iota - 3 // Kill the calling process of the syscall.
|
||||
Trap // Trap and coredump the calling process of the syscall.
|
||||
Allow // Allow the syscall to be completed.
|
||||
)
|
||||
|
||||
// Syscall is the specified syscall, action, and any type of arguments
|
||||
// to filter on.
|
||||
type Syscall struct {
|
||||
// Value is the syscall number.
|
||||
Value uint32
|
||||
// Action is the action to perform when the specified syscall is made.
|
||||
Action Action
|
||||
// Args are filters that can be specified on the arguments to the syscall.
|
||||
Args Args
|
||||
}
|
||||
|
||||
func (s *Syscall) scmpAction() uint32 {
|
||||
switch s.Action {
|
||||
case Allow:
|
||||
return retAllow
|
||||
case Trap:
|
||||
return retTrap
|
||||
case Kill:
|
||||
return retKill
|
||||
}
|
||||
return actionErrno(uint32(s.Action))
|
||||
}
|
||||
|
||||
// Arg represents an argument to the syscall with the argument's index,
|
||||
// the operator to apply when matching, and the argument's value at that time.
|
||||
type Arg struct {
|
||||
Index uint32 // index of args which start from zero
|
||||
Op Operator // operation, such as EQ/NE/GE/LE
|
||||
Value uint // the value of arg
|
||||
}
|
||||
|
||||
type Args [][]Arg
|
||||
|
||||
var (
|
||||
ErrUnresolvedLabel = errors.New("seccomp: unresolved label")
|
||||
ErrDuplicateLabel = errors.New("seccomp: duplicate label use")
|
||||
ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument")
|
||||
)
|
||||
|
||||
// Error returns an Action that will be used to send the calling
|
||||
// process the specified errno when the syscall is made.
|
||||
func Error(code syscall.Errno) Action {
|
||||
return Action(code)
|
||||
}
|
||||
|
||||
// New returns a new syscall context for use.
|
||||
func New() *Context {
|
||||
return &Context{
|
||||
syscalls: make(map[uint32]*Syscall),
|
||||
}
|
||||
}
|
||||
|
||||
// Context holds syscalls for the current process to limit the type of
|
||||
// actions the calling process can make.
|
||||
type Context struct {
|
||||
syscalls map[uint32]*Syscall
|
||||
}
|
||||
|
||||
// Add will add the specified syscall, action, and arguments to the seccomp
|
||||
// Context.
|
||||
func (c *Context) Add(s *Syscall) {
|
||||
c.syscalls[s.Value] = s
|
||||
}
|
||||
|
||||
// Remove removes the specified syscall configuration from the Context.
|
||||
func (c *Context) Remove(call uint32) {
|
||||
delete(c.syscalls, call)
|
||||
}
|
||||
|
||||
// Load will apply the Context to the calling process makeing any secccomp process changes
|
||||
// apply after the context is loaded.
|
||||
func (c *Context) Load() error {
|
||||
filter, err := c.newFilter()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
prog := newSockFprog(filter)
|
||||
return prog.set()
|
||||
}
|
||||
|
||||
func (c *Context) newFilter() ([]sockFilter, error) {
|
||||
var (
|
||||
labels bpfLabels
|
||||
f = newFilter()
|
||||
)
|
||||
for _, s := range c.syscalls {
|
||||
f.addSyscall(s, &labels)
|
||||
}
|
||||
f.allow()
|
||||
// process args for the syscalls
|
||||
for _, s := range c.syscalls {
|
||||
if err := f.addArguments(s, &labels); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
// apply labels for arguments
|
||||
idx := int32(len(*f) - 1)
|
||||
for ; idx >= 0; idx-- {
|
||||
lf := &(*f)[idx]
|
||||
if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) {
|
||||
continue
|
||||
}
|
||||
rel := int32(lf.jt)<<8 | int32(lf.jf)
|
||||
if ((jumpJT << 8) | jumpJF) == rel {
|
||||
if labels[lf.k].location == 0xffffffff {
|
||||
return nil, ErrUnresolvedLabel
|
||||
}
|
||||
lf.k = labels[lf.k].location - uint32(idx+1)
|
||||
lf.jt = 0
|
||||
lf.jf = 0
|
||||
} else if ((labelJT << 8) | labelJF) == rel {
|
||||
if labels[lf.k].location != 0xffffffff {
|
||||
return nil, ErrDuplicateLabel
|
||||
}
|
||||
labels[lf.k].location = uint32(idx)
|
||||
lf.k = 0
|
||||
lf.jt = 0
|
||||
lf.jf = 0
|
||||
}
|
||||
}
|
||||
return *f, nil
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
package seccomp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type sockFilter struct {
|
||||
code uint16
|
||||
jt uint8
|
||||
jf uint8
|
||||
k uint32
|
||||
}
|
||||
|
||||
func newFilter() *filter {
|
||||
var f filter
|
||||
f = append(f, sockFilter{
|
||||
pfLD + syscall.BPF_W + syscall.BPF_ABS,
|
||||
0,
|
||||
0,
|
||||
uint32(unsafe.Offsetof(secData.nr)),
|
||||
})
|
||||
return &f
|
||||
}
|
||||
|
||||
type filter []sockFilter
|
||||
|
||||
func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) {
|
||||
if len(s.Args) == 0 {
|
||||
f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()))
|
||||
} else {
|
||||
if len(s.Args[0]) > 0 {
|
||||
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index)
|
||||
f.call(s.Value,
|
||||
scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
|
||||
jumpJT, jumpJF))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error {
|
||||
for i := 0; len(s.Args) > i; i++ {
|
||||
if len(s.Args[i]) > 0 {
|
||||
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index)
|
||||
f.label(labels, lb)
|
||||
f.arg(s.Args[i][0].Index)
|
||||
}
|
||||
for j := 0; j < len(s.Args[i]); j++ {
|
||||
var jf sockFilter
|
||||
if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 {
|
||||
lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index)
|
||||
jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA,
|
||||
labelIndex(labels, lbj), jumpJT, jumpJF)
|
||||
} else {
|
||||
jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())
|
||||
}
|
||||
if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
f.allow()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *filter) label(labels *bpfLabels, lb string) {
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF))
|
||||
}
|
||||
|
||||
func (f *filter) call(nr uint32, jt sockFilter) {
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1))
|
||||
*f = append(*f, jt)
|
||||
}
|
||||
|
||||
func (f *filter) allow() {
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow))
|
||||
}
|
||||
|
||||
func (f *filter) deny() {
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap))
|
||||
}
|
||||
|
||||
func (f *filter) arg(index uint32) {
|
||||
arg(f, index)
|
||||
}
|
||||
|
||||
func (f *filter) op(operation Operator, v uint, jf sockFilter) error {
|
||||
switch operation {
|
||||
case EqualTo:
|
||||
jumpEqualTo(f, v, jf)
|
||||
case NotEqualTo:
|
||||
jumpNotEqualTo(f, v, jf)
|
||||
case GreatherThan:
|
||||
jumpGreaterThan(f, v, jf)
|
||||
case LessThan:
|
||||
jumpLessThan(f, v, jf)
|
||||
case MaskEqualTo:
|
||||
jumpMaskEqualTo(f, v, jf)
|
||||
default:
|
||||
return ErrUnsupportedOperation
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func arg(f *filter, idx uint32) {
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx)))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx)))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1))
|
||||
}
|
||||
|
||||
func jump(f *filter, labels *bpfLabels, lb string) {
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
|
||||
jumpJT, jumpJF))
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
// +build linux,amd64
|
||||
|
||||
package seccomp
|
||||
|
||||
// Using BPF filters
|
||||
//
|
||||
// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf
|
||||
import "syscall"
|
||||
|
||||
func jumpGreaterThan(f *filter, v uint, jt sockFilter) {
|
||||
lo := uint32(uint64(v) % 0x100000000)
|
||||
hi := uint32(uint64(v) / 0x100000000)
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0))
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
*f = append(*f, jt)
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
}
|
||||
|
||||
func jumpEqualTo(f *filter, v uint, jt sockFilter) {
|
||||
lo := uint32(uint64(v) % 0x100000000)
|
||||
hi := uint32(uint64(v) / 0x100000000)
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
*f = append(*f, jt)
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
}
|
||||
|
||||
func jumpLessThan(f *filter, v uint, jt sockFilter) {
|
||||
lo := uint32(uint64(v) % 0x100000000)
|
||||
hi := uint32(uint64(v) / 0x100000000)
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0))
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
*f = append(*f, jt)
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
}
|
||||
|
||||
func jumpNotEqualTo(f *filter, v uint, jt sockFilter) {
|
||||
lo := uint32(uint64(v) % 0x100000000)
|
||||
hi := uint32(uint64(v) / 0x100000000)
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
*f = append(*f, jt)
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
}
|
||||
|
||||
// this checks for a value inside a mask. The evalusation is equal to doing
|
||||
// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER
|
||||
func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) {
|
||||
lo := uint32(uint64(v) % 0x100000000)
|
||||
hi := uint32(uint64(v) / 0x100000000)
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v)))
|
||||
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2))
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
*f = append(*f, jt)
|
||||
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go.
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Operator that is used for argument comparison.
|
||||
type Operator int
|
||||
|
||||
const (
|
||||
EqualTo Operator = iota
|
||||
NotEqualTo
|
||||
GreatherThan
|
||||
LessThan
|
||||
MaskEqualTo
|
||||
)
|
||||
|
||||
const (
|
||||
jumpJT = 0xff
|
||||
jumpJF = 0xff
|
||||
labelJT = 0xfe
|
||||
labelJF = 0xfe
|
||||
)
|
||||
|
||||
const (
|
||||
pfLD = 0x0
|
||||
retKill = 0x00000000
|
||||
retTrap = 0x00030000
|
||||
retAllow = 0x7fff0000
|
||||
modeFilter = 0x2
|
||||
prSetNoNewPrivileges = 0x26
|
||||
)
|
||||
|
||||
func actionErrno(errno uint32) uint32 {
|
||||
return 0x00050000 | (errno & 0x0000ffff)
|
||||
}
|
||||
|
||||
var (
|
||||
secData = struct {
|
||||
nr int32
|
||||
arch uint32
|
||||
insPointer uint64
|
||||
args [6]uint64
|
||||
}{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
|
||||
)
|
||||
|
||||
var isLittle = func() bool {
|
||||
var (
|
||||
x = 0x1234
|
||||
p = unsafe.Pointer(&x)
|
||||
p2 = (*[unsafe.Sizeof(0)]byte)(p)
|
||||
)
|
||||
if p2[0] == 0 {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}()
|
||||
|
||||
var endian endianSupport
|
||||
|
||||
type endianSupport struct {
|
||||
}
|
||||
|
||||
func (e endianSupport) hi(i uint32) uint32 {
|
||||
if isLittle {
|
||||
return e.little(i)
|
||||
}
|
||||
return e.big(i)
|
||||
}
|
||||
|
||||
func (e endianSupport) low(i uint32) uint32 {
|
||||
if isLittle {
|
||||
return e.big(i)
|
||||
}
|
||||
return e.little(i)
|
||||
}
|
||||
|
||||
func (endianSupport) big(idx uint32) uint32 {
|
||||
if idx >= 6 {
|
||||
return 0
|
||||
}
|
||||
return uint32(unsafe.Offsetof(secData.args)) + 8*idx
|
||||
}
|
||||
|
||||
func (endianSupport) little(idx uint32) uint32 {
|
||||
if idx < 0 || idx >= 6 {
|
||||
return 0
|
||||
}
|
||||
return uint32(unsafe.Offsetof(secData.args)) +
|
||||
uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
|
||||
}
|
||||
|
||||
func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error {
|
||||
_, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
|
||||
if err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func newSockFprog(filter []sockFilter) *sockFprog {
|
||||
return &sockFprog{
|
||||
len: uint16(len(filter)),
|
||||
filt: filter,
|
||||
}
|
||||
}
|
||||
|
||||
type sockFprog struct {
|
||||
len uint16
|
||||
filt []sockFilter
|
||||
}
|
||||
|
||||
func (s *sockFprog) set() error {
|
||||
_, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
|
||||
uintptr(modeFilter), uintptr(unsafe.Pointer(s)))
|
||||
if err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -99,5 +99,8 @@ func (l *linuxStandardInit) Init() error {
|
|||
if syscall.Getppid() != l.parentPid {
|
||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||
}
|
||||
if err := finalizeSeccomp(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||
}
|
||||
|
|
|
@ -21,16 +21,20 @@ var setNsMap = map[string]uintptr{
|
|||
"linux/s390x": 339,
|
||||
}
|
||||
|
||||
var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
|
||||
|
||||
func SysSetns() uint32 {
|
||||
return uint32(sysSetns)
|
||||
}
|
||||
|
||||
func Setns(fd uintptr, flags uintptr) error {
|
||||
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
|
||||
if !exists {
|
||||
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
|
||||
}
|
||||
|
||||
_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
|
||||
if err != 0 {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue