Merge pull request #613 from crosbymichael/seccomp-args
Golang seccomp package
This commit is contained in:
commit
204502647d
|
@ -13,6 +13,40 @@ type IDMap struct {
|
||||||
Size int `json:"size"`
|
Size int `json:"size"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Seccomp struct {
|
||||||
|
Syscalls []*Syscall `json:"syscalls"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Action int
|
||||||
|
|
||||||
|
const (
|
||||||
|
Kill Action = iota - 3
|
||||||
|
Trap
|
||||||
|
Allow
|
||||||
|
)
|
||||||
|
|
||||||
|
type Operator int
|
||||||
|
|
||||||
|
const (
|
||||||
|
EqualTo Operator = iota
|
||||||
|
NotEqualTo
|
||||||
|
GreatherThan
|
||||||
|
LessThan
|
||||||
|
MaskEqualTo
|
||||||
|
)
|
||||||
|
|
||||||
|
type Arg struct {
|
||||||
|
Index int `json:"index"`
|
||||||
|
Value uint32 `json:"value"`
|
||||||
|
Op Operator `json:"op"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Syscall struct {
|
||||||
|
Value int `json:"value"`
|
||||||
|
Action Action `json:"action"`
|
||||||
|
Args []*Arg `json:"args"`
|
||||||
|
}
|
||||||
|
|
||||||
// TODO Windows. Many of these fields should be factored out into those parts
|
// TODO Windows. Many of these fields should be factored out into those parts
|
||||||
// which are common across platforms, and those which are platform specific.
|
// which are common across platforms, and those which are platform specific.
|
||||||
|
|
||||||
|
@ -104,4 +138,9 @@ type Config struct {
|
||||||
// SystemProperties is a map of properties and their values. It is the equivalent of using
|
// SystemProperties is a map of properties and their values. It is the equivalent of using
|
||||||
// sysctl -w my.property.name value in Linux.
|
// sysctl -w my.property.name value in Linux.
|
||||||
SystemProperties map[string]string `json:"system_properties"`
|
SystemProperties map[string]string `json:"system_properties"`
|
||||||
|
|
||||||
|
// Seccomp allows actions to be taken whenever a syscall is made within the container.
|
||||||
|
// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
|
||||||
|
// can be specified on a per syscall basis.
|
||||||
|
Seccomp *Seccomp `json:"seccomp"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,7 @@ import (
|
||||||
"github.com/docker/libcontainer/cgroups"
|
"github.com/docker/libcontainer/cgroups"
|
||||||
"github.com/docker/libcontainer/configs"
|
"github.com/docker/libcontainer/configs"
|
||||||
"github.com/docker/libcontainer/netlink"
|
"github.com/docker/libcontainer/netlink"
|
||||||
|
"github.com/docker/libcontainer/seccomp"
|
||||||
"github.com/docker/libcontainer/system"
|
"github.com/docker/libcontainer/system"
|
||||||
"github.com/docker/libcontainer/user"
|
"github.com/docker/libcontainer/user"
|
||||||
"github.com/docker/libcontainer/utils"
|
"github.com/docker/libcontainer/utils"
|
||||||
|
@ -259,3 +260,61 @@ func killCgroupProcesses(m cgroups.Manager) error {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func finalizeSeccomp(config *initConfig) error {
|
||||||
|
if config.Config.Seccomp == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
context := seccomp.New()
|
||||||
|
for _, s := range config.Config.Seccomp.Syscalls {
|
||||||
|
ss := &seccomp.Syscall{
|
||||||
|
Value: uint32(s.Value),
|
||||||
|
Action: seccompAction(s.Action),
|
||||||
|
}
|
||||||
|
if len(s.Args) > 0 {
|
||||||
|
ss.Args = seccompArgs(s.Args)
|
||||||
|
}
|
||||||
|
context.Add(ss)
|
||||||
|
}
|
||||||
|
return context.Load()
|
||||||
|
}
|
||||||
|
|
||||||
|
func seccompAction(a configs.Action) seccomp.Action {
|
||||||
|
switch a {
|
||||||
|
case configs.Kill:
|
||||||
|
return seccomp.Kill
|
||||||
|
case configs.Trap:
|
||||||
|
return seccomp.Trap
|
||||||
|
case configs.Allow:
|
||||||
|
return seccomp.Allow
|
||||||
|
}
|
||||||
|
return seccomp.Error(syscall.Errno(int(a)))
|
||||||
|
}
|
||||||
|
|
||||||
|
func seccompArgs(args []*configs.Arg) seccomp.Args {
|
||||||
|
var sa []seccomp.Arg
|
||||||
|
for _, a := range args {
|
||||||
|
sa = append(sa, seccomp.Arg{
|
||||||
|
Index: uint32(a.Index),
|
||||||
|
Op: seccompOperator(a.Op),
|
||||||
|
Value: uint(a.Value),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return seccomp.Args{sa}
|
||||||
|
}
|
||||||
|
|
||||||
|
func seccompOperator(o configs.Operator) seccomp.Operator {
|
||||||
|
switch o {
|
||||||
|
case configs.EqualTo:
|
||||||
|
return seccomp.EqualTo
|
||||||
|
case configs.NotEqualTo:
|
||||||
|
return seccomp.NotEqualTo
|
||||||
|
case configs.GreatherThan:
|
||||||
|
return seccomp.GreatherThan
|
||||||
|
case configs.LessThan:
|
||||||
|
return seccomp.LessThan
|
||||||
|
case configs.MaskEqualTo:
|
||||||
|
return seccomp.MaskEqualTo
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
|
@ -714,3 +714,27 @@ func TestSystemProperties(t *testing.T) {
|
||||||
t.Fatalf("kernel.shmmni property expected to be 8192, but is %s", shmmniOutput)
|
t.Fatalf("kernel.shmmni property expected to be 8192, but is %s", shmmniOutput)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSeccompNoChown(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rootfs, err := newRootfs()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer remove(rootfs)
|
||||||
|
config := newTemplateConfig(rootfs)
|
||||||
|
config.Seccomp = &configs.Seccomp{}
|
||||||
|
config.Seccomp.Syscalls = append(config.Seccomp.Syscalls, &configs.Syscall{
|
||||||
|
Value: syscall.SYS_CHOWN,
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
})
|
||||||
|
buffers, _, err := runContainer(config, "", "/bin/sh", "-c", "chown 1:1 /tmp")
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("running chown in a container should fail")
|
||||||
|
}
|
||||||
|
if s := buffers.String(); !strings.Contains(s, "not permitted") {
|
||||||
|
t.Fatalf("running chown should result in an EPERM but got %q", s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -122,11 +122,11 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe
|
||||||
|
|
||||||
err = container.Start(process)
|
err = container.Start(process)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, -1, err
|
return buffers, -1, err
|
||||||
}
|
}
|
||||||
ps, err := process.Wait()
|
ps, err := process.Wait()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, -1, err
|
return buffers, -1, err
|
||||||
}
|
}
|
||||||
status := ps.Sys().(syscall.WaitStatus)
|
status := ps.Sys().(syscall.WaitStatus)
|
||||||
if status.Exited() {
|
if status.Exited() {
|
||||||
|
@ -134,7 +134,7 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe
|
||||||
} else if status.Signaled() {
|
} else if status.Signaled() {
|
||||||
exitCode = -int(status.Signal())
|
exitCode = -int(status.Signal())
|
||||||
} else {
|
} else {
|
||||||
return nil, -1, err
|
return buffers, -1, err
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,32 +19,33 @@ import (
|
||||||
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
|
||||||
|
|
||||||
var createFlags = []cli.Flag{
|
var createFlags = []cli.Flag{
|
||||||
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"},
|
cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
|
||||||
cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"},
|
cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"},
|
||||||
cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
|
|
||||||
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
|
|
||||||
cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"},
|
cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"},
|
||||||
cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"},
|
cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"},
|
||||||
cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"},
|
cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"},
|
||||||
|
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"},
|
||||||
|
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
|
||||||
|
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
|
||||||
|
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
|
||||||
cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"},
|
cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"},
|
||||||
cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"},
|
cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"},
|
||||||
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
|
|
||||||
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
|
|
||||||
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
|
|
||||||
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
|
|
||||||
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
|
|
||||||
cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"},
|
cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"},
|
||||||
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
|
|
||||||
cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"},
|
cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"},
|
||||||
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
|
|
||||||
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
|
|
||||||
cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"},
|
cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"},
|
||||||
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
|
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
|
||||||
|
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
|
||||||
|
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
|
||||||
|
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
|
||||||
|
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
|
||||||
|
cli.StringFlag{Name: "security", Value: "", Usage: "set the security profile (high, medium, low)"},
|
||||||
|
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
|
||||||
cli.StringFlag{Name: "veth-address", Usage: "veth ip address"},
|
cli.StringFlag{Name: "veth-address", Usage: "veth ip address"},
|
||||||
|
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
|
||||||
cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"},
|
cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"},
|
||||||
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
|
cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
|
||||||
cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
|
|
||||||
cli.StringSliceFlag{Name: "sysctl", Value: &cli.StringSlice{}, Usage: "set system properties in the container"},
|
cli.StringSliceFlag{Name: "sysctl", Value: &cli.StringSlice{}, Usage: "set system properties in the container"},
|
||||||
|
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
|
||||||
}
|
}
|
||||||
|
|
||||||
var configCommand = cli.Command{
|
var configCommand = cli.Command{
|
||||||
|
@ -203,6 +204,24 @@ func modify(config *configs.Config, context *cli.Context) {
|
||||||
Device: "cgroup",
|
Device: "cgroup",
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
modifySecurityProfile(context, config)
|
||||||
|
}
|
||||||
|
|
||||||
|
func modifySecurityProfile(context *cli.Context, config *configs.Config) {
|
||||||
|
profileName := context.String("security")
|
||||||
|
if profileName == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
profile := profiles[profileName]
|
||||||
|
if profile == nil {
|
||||||
|
logrus.Fatalf("invalid profile name %q", profileName)
|
||||||
|
}
|
||||||
|
config.Rlimits = profile.Rlimits
|
||||||
|
config.Capabilities = profile.Capabilities
|
||||||
|
config.Seccomp = profile.Seccomp
|
||||||
|
config.AppArmorProfile = profile.ApparmorProfile
|
||||||
|
config.MountLabel = profile.MountLabel
|
||||||
|
config.ProcessLabel = profile.ProcessLabel
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTemplate() *configs.Config {
|
func getTemplate() *configs.Config {
|
||||||
|
@ -290,13 +309,5 @@ func getTemplate() *configs.Config {
|
||||||
Flags: defaultMountFlags | syscall.MS_RDONLY,
|
Flags: defaultMountFlags | syscall.MS_RDONLY,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Rlimits: []configs.Rlimit{
|
|
||||||
{
|
|
||||||
Type: syscall.RLIMIT_NOFILE,
|
|
||||||
Hard: 1024,
|
|
||||||
Soft: 1024,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,272 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
"github.com/docker/libcontainer/configs"
|
||||||
|
"github.com/docker/libcontainer/system"
|
||||||
|
)
|
||||||
|
|
||||||
|
var profiles = map[string]*securityProfile{
|
||||||
|
"high": highProfile,
|
||||||
|
"medium": mediumProfile,
|
||||||
|
"low": lowProfile,
|
||||||
|
}
|
||||||
|
|
||||||
|
type securityProfile struct {
|
||||||
|
Capabilities []string `json:"capabilities"`
|
||||||
|
ApparmorProfile string `json:"apparmor_profile"`
|
||||||
|
MountLabel string `json:"mount_label"`
|
||||||
|
ProcessLabel string `json:"process_label"`
|
||||||
|
Rlimits []configs.Rlimit `json:"rlimits"`
|
||||||
|
Seccomp *configs.Seccomp `json:"seccomp"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// this should be a runtime config that is not able to do things like apt-get or yum install.
|
||||||
|
var highProfile = &securityProfile{
|
||||||
|
Capabilities: []string{
|
||||||
|
"NET_BIND_SERVICE",
|
||||||
|
"KILL",
|
||||||
|
"AUDIT_WRITE",
|
||||||
|
},
|
||||||
|
Rlimits: []configs.Rlimit{
|
||||||
|
{
|
||||||
|
Type: syscall.RLIMIT_NOFILE,
|
||||||
|
Hard: 1024,
|
||||||
|
Soft: 1024,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// http://man7.org/linux/man-pages/man2/syscalls.2.html
|
||||||
|
Seccomp: &configs.Seccomp{
|
||||||
|
Syscalls: []*configs.Syscall{
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CAPSET, // http://man7.org/linux/man-pages/man2/capset.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: int(system.SysSetns()),
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CHMOD, // http://man7.org/linux/man-pages/man2/chmod.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CHOWN, // http://man7.org/linux/man-pages/man2/chown.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_LINK, // http://man7.org/linux/man-pages/man2/link.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_LINKAT, // http://man7.org/linux/man-pages/man2/linkat.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_UNLINK, // http://man7.org/linux/man-pages/man2/unlink.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_UNLINKAT, // http://man7.org/linux/man-pages/man2/unlinkat.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CHROOT, // http://man7.org/linux/man-pages/man2/chroot.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_SETDOMAINNAME, // http://man7.org/linux/man-pages/man2/setdomainname.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_SETHOSTNAME, // http://man7.org/linux/man-pages/man2/sethostname.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
Args: []*configs.Arg{
|
||||||
|
{
|
||||||
|
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
|
||||||
|
Value: syscall.CLONE_NEWUSER,
|
||||||
|
Op: configs.MaskEqualTo,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is a medium level profile that should be able to do things like installing from
|
||||||
|
// apt-get or yum.
|
||||||
|
var mediumProfile = &securityProfile{
|
||||||
|
Capabilities: []string{
|
||||||
|
"CHOWN",
|
||||||
|
"DAC_OVERRIDE",
|
||||||
|
"FSETID",
|
||||||
|
"FOWNER",
|
||||||
|
"SETGID",
|
||||||
|
"SETUID",
|
||||||
|
"SETFCAP",
|
||||||
|
"SETPCAP",
|
||||||
|
"NET_BIND_SERVICE",
|
||||||
|
"KILL",
|
||||||
|
"AUDIT_WRITE",
|
||||||
|
},
|
||||||
|
Rlimits: []configs.Rlimit{
|
||||||
|
{
|
||||||
|
Type: syscall.RLIMIT_NOFILE,
|
||||||
|
Hard: 1024,
|
||||||
|
Soft: 1024,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// http://man7.org/linux/man-pages/man2/syscalls.2.html
|
||||||
|
Seccomp: &configs.Seccomp{
|
||||||
|
Syscalls: []*configs.Syscall{
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: int(system.SysSetns()),
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CHROOT, // http://man7.org/linux/man-pages/man2/chroot.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_SETDOMAINNAME, // http://man7.org/linux/man-pages/man2/setdomainname.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_SETHOSTNAME, // http://man7.org/linux/man-pages/man2/sethostname.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
Args: []*configs.Arg{
|
||||||
|
{
|
||||||
|
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
|
||||||
|
Value: syscall.CLONE_NEWUSER,
|
||||||
|
Op: configs.MaskEqualTo,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var lowProfile = &securityProfile{
|
||||||
|
Capabilities: []string{
|
||||||
|
"CHOWN",
|
||||||
|
"DAC_OVERRIDE",
|
||||||
|
"FSETID",
|
||||||
|
"FOWNER",
|
||||||
|
"SETGID",
|
||||||
|
"SETUID",
|
||||||
|
"SYS_CHROOT",
|
||||||
|
"SETFCAP",
|
||||||
|
"SETPCAP",
|
||||||
|
"NET_BIND_SERVICE",
|
||||||
|
"KILL",
|
||||||
|
"AUDIT_WRITE",
|
||||||
|
},
|
||||||
|
Rlimits: []configs.Rlimit{
|
||||||
|
{
|
||||||
|
Type: syscall.RLIMIT_NOFILE,
|
||||||
|
Hard: 1024,
|
||||||
|
Soft: 1024,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// http://man7.org/linux/man-pages/man2/syscalls.2.html
|
||||||
|
Seccomp: &configs.Seccomp{
|
||||||
|
Syscalls: []*configs.Syscall{
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: int(system.SysSetns()),
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
|
||||||
|
Action: configs.Action(syscall.EPERM),
|
||||||
|
Args: []*configs.Arg{
|
||||||
|
{
|
||||||
|
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
|
||||||
|
Value: syscall.CLONE_NEWUSER,
|
||||||
|
Op: configs.MaskEqualTo,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
package seccomp
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
type bpfLabel struct {
|
||||||
|
label string
|
||||||
|
location uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
type bpfLabels []bpfLabel
|
||||||
|
|
||||||
|
// labelIndex returns the index for the label if it exists in the slice.
|
||||||
|
// if it does not exist in the slice it appends the label lb to the end
|
||||||
|
// of the slice and returns the index.
|
||||||
|
func labelIndex(labels *bpfLabels, lb string) uint32 {
|
||||||
|
var id uint32
|
||||||
|
for id = 0; id < uint32(len(*labels)); id++ {
|
||||||
|
if strings.EqualFold(lb, (*labels)[id].label) {
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*labels = append(*labels, bpfLabel{lb, 0xffffffff})
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
|
||||||
|
func scmpBpfStmt(code uint16, k uint32) sockFilter {
|
||||||
|
return sockFilter{code, 0, 0, k}
|
||||||
|
}
|
||||||
|
|
||||||
|
func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
|
||||||
|
return sockFilter{code, jt, jf, k}
|
||||||
|
}
|
|
@ -0,0 +1,144 @@
|
||||||
|
package seccomp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"syscall"
|
||||||
|
)
|
||||||
|
|
||||||
|
const labelTemplate = "lb-%d-%d"
|
||||||
|
|
||||||
|
// Action is the type of action that will be taken when a
|
||||||
|
// syscall is performed.
|
||||||
|
type Action int
|
||||||
|
|
||||||
|
const (
|
||||||
|
Kill Action = iota - 3 // Kill the calling process of the syscall.
|
||||||
|
Trap // Trap and coredump the calling process of the syscall.
|
||||||
|
Allow // Allow the syscall to be completed.
|
||||||
|
)
|
||||||
|
|
||||||
|
// Syscall is the specified syscall, action, and any type of arguments
|
||||||
|
// to filter on.
|
||||||
|
type Syscall struct {
|
||||||
|
// Value is the syscall number.
|
||||||
|
Value uint32
|
||||||
|
// Action is the action to perform when the specified syscall is made.
|
||||||
|
Action Action
|
||||||
|
// Args are filters that can be specified on the arguments to the syscall.
|
||||||
|
Args Args
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Syscall) scmpAction() uint32 {
|
||||||
|
switch s.Action {
|
||||||
|
case Allow:
|
||||||
|
return retAllow
|
||||||
|
case Trap:
|
||||||
|
return retTrap
|
||||||
|
case Kill:
|
||||||
|
return retKill
|
||||||
|
}
|
||||||
|
return actionErrno(uint32(s.Action))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Arg represents an argument to the syscall with the argument's index,
|
||||||
|
// the operator to apply when matching, and the argument's value at that time.
|
||||||
|
type Arg struct {
|
||||||
|
Index uint32 // index of args which start from zero
|
||||||
|
Op Operator // operation, such as EQ/NE/GE/LE
|
||||||
|
Value uint // the value of arg
|
||||||
|
}
|
||||||
|
|
||||||
|
type Args [][]Arg
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrUnresolvedLabel = errors.New("seccomp: unresolved label")
|
||||||
|
ErrDuplicateLabel = errors.New("seccomp: duplicate label use")
|
||||||
|
ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Error returns an Action that will be used to send the calling
|
||||||
|
// process the specified errno when the syscall is made.
|
||||||
|
func Error(code syscall.Errno) Action {
|
||||||
|
return Action(code)
|
||||||
|
}
|
||||||
|
|
||||||
|
// New returns a new syscall context for use.
|
||||||
|
func New() *Context {
|
||||||
|
return &Context{
|
||||||
|
syscalls: make(map[uint32]*Syscall),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Context holds syscalls for the current process to limit the type of
|
||||||
|
// actions the calling process can make.
|
||||||
|
type Context struct {
|
||||||
|
syscalls map[uint32]*Syscall
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add will add the specified syscall, action, and arguments to the seccomp
|
||||||
|
// Context.
|
||||||
|
func (c *Context) Add(s *Syscall) {
|
||||||
|
c.syscalls[s.Value] = s
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove removes the specified syscall configuration from the Context.
|
||||||
|
func (c *Context) Remove(call uint32) {
|
||||||
|
delete(c.syscalls, call)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load will apply the Context to the calling process makeing any secccomp process changes
|
||||||
|
// apply after the context is loaded.
|
||||||
|
func (c *Context) Load() error {
|
||||||
|
filter, err := c.newFilter()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
prog := newSockFprog(filter)
|
||||||
|
return prog.set()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Context) newFilter() ([]sockFilter, error) {
|
||||||
|
var (
|
||||||
|
labels bpfLabels
|
||||||
|
f = newFilter()
|
||||||
|
)
|
||||||
|
for _, s := range c.syscalls {
|
||||||
|
f.addSyscall(s, &labels)
|
||||||
|
}
|
||||||
|
f.allow()
|
||||||
|
// process args for the syscalls
|
||||||
|
for _, s := range c.syscalls {
|
||||||
|
if err := f.addArguments(s, &labels); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// apply labels for arguments
|
||||||
|
idx := int32(len(*f) - 1)
|
||||||
|
for ; idx >= 0; idx-- {
|
||||||
|
lf := &(*f)[idx]
|
||||||
|
if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
rel := int32(lf.jt)<<8 | int32(lf.jf)
|
||||||
|
if ((jumpJT << 8) | jumpJF) == rel {
|
||||||
|
if labels[lf.k].location == 0xffffffff {
|
||||||
|
return nil, ErrUnresolvedLabel
|
||||||
|
}
|
||||||
|
lf.k = labels[lf.k].location - uint32(idx+1)
|
||||||
|
lf.jt = 0
|
||||||
|
lf.jf = 0
|
||||||
|
} else if ((labelJT << 8) | labelJF) == rel {
|
||||||
|
if labels[lf.k].location != 0xffffffff {
|
||||||
|
return nil, ErrDuplicateLabel
|
||||||
|
}
|
||||||
|
labels[lf.k].location = uint32(idx)
|
||||||
|
lf.k = 0
|
||||||
|
lf.jt = 0
|
||||||
|
lf.jf = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return *f, nil
|
||||||
|
}
|
|
@ -0,0 +1,116 @@
|
||||||
|
package seccomp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"syscall"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
type sockFilter struct {
|
||||||
|
code uint16
|
||||||
|
jt uint8
|
||||||
|
jf uint8
|
||||||
|
k uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
func newFilter() *filter {
|
||||||
|
var f filter
|
||||||
|
f = append(f, sockFilter{
|
||||||
|
pfLD + syscall.BPF_W + syscall.BPF_ABS,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
uint32(unsafe.Offsetof(secData.nr)),
|
||||||
|
})
|
||||||
|
return &f
|
||||||
|
}
|
||||||
|
|
||||||
|
type filter []sockFilter
|
||||||
|
|
||||||
|
func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) {
|
||||||
|
if len(s.Args) == 0 {
|
||||||
|
f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()))
|
||||||
|
} else {
|
||||||
|
if len(s.Args[0]) > 0 {
|
||||||
|
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index)
|
||||||
|
f.call(s.Value,
|
||||||
|
scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
|
||||||
|
jumpJT, jumpJF))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error {
|
||||||
|
for i := 0; len(s.Args) > i; i++ {
|
||||||
|
if len(s.Args[i]) > 0 {
|
||||||
|
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index)
|
||||||
|
f.label(labels, lb)
|
||||||
|
f.arg(s.Args[i][0].Index)
|
||||||
|
}
|
||||||
|
for j := 0; j < len(s.Args[i]); j++ {
|
||||||
|
var jf sockFilter
|
||||||
|
if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 {
|
||||||
|
lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index)
|
||||||
|
jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA,
|
||||||
|
labelIndex(labels, lbj), jumpJT, jumpJF)
|
||||||
|
} else {
|
||||||
|
jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())
|
||||||
|
}
|
||||||
|
if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
f.allow()
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *filter) label(labels *bpfLabels, lb string) {
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *filter) call(nr uint32, jt sockFilter) {
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1))
|
||||||
|
*f = append(*f, jt)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *filter) allow() {
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *filter) deny() {
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *filter) arg(index uint32) {
|
||||||
|
arg(f, index)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *filter) op(operation Operator, v uint, jf sockFilter) error {
|
||||||
|
switch operation {
|
||||||
|
case EqualTo:
|
||||||
|
jumpEqualTo(f, v, jf)
|
||||||
|
case NotEqualTo:
|
||||||
|
jumpNotEqualTo(f, v, jf)
|
||||||
|
case GreatherThan:
|
||||||
|
jumpGreaterThan(f, v, jf)
|
||||||
|
case LessThan:
|
||||||
|
jumpLessThan(f, v, jf)
|
||||||
|
case MaskEqualTo:
|
||||||
|
jumpMaskEqualTo(f, v, jf)
|
||||||
|
default:
|
||||||
|
return ErrUnsupportedOperation
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func arg(f *filter, idx uint32) {
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx)))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx)))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
func jump(f *filter, labels *bpfLabels, lb string) {
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
|
||||||
|
jumpJT, jumpJF))
|
||||||
|
}
|
|
@ -0,0 +1,68 @@
|
||||||
|
// +build linux,amd64
|
||||||
|
|
||||||
|
package seccomp
|
||||||
|
|
||||||
|
// Using BPF filters
|
||||||
|
//
|
||||||
|
// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf
|
||||||
|
import "syscall"
|
||||||
|
|
||||||
|
func jumpGreaterThan(f *filter, v uint, jt sockFilter) {
|
||||||
|
lo := uint32(uint64(v) % 0x100000000)
|
||||||
|
hi := uint32(uint64(v) / 0x100000000)
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0))
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
*f = append(*f, jt)
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
func jumpEqualTo(f *filter, v uint, jt sockFilter) {
|
||||||
|
lo := uint32(uint64(v) % 0x100000000)
|
||||||
|
hi := uint32(uint64(v) / 0x100000000)
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
*f = append(*f, jt)
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
func jumpLessThan(f *filter, v uint, jt sockFilter) {
|
||||||
|
lo := uint32(uint64(v) % 0x100000000)
|
||||||
|
hi := uint32(uint64(v) / 0x100000000)
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0))
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
*f = append(*f, jt)
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
func jumpNotEqualTo(f *filter, v uint, jt sockFilter) {
|
||||||
|
lo := uint32(uint64(v) % 0x100000000)
|
||||||
|
hi := uint32(uint64(v) / 0x100000000)
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
*f = append(*f, jt)
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
}
|
||||||
|
|
||||||
|
// this checks for a value inside a mask. The evalusation is equal to doing
|
||||||
|
// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER
|
||||||
|
func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) {
|
||||||
|
lo := uint32(uint64(v) % 0x100000000)
|
||||||
|
hi := uint32(uint64(v) / 0x100000000)
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v)))
|
||||||
|
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2))
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
*f = append(*f, jt)
|
||||||
|
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
|
||||||
|
}
|
|
@ -0,0 +1,122 @@
|
||||||
|
// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go.
|
||||||
|
package seccomp
|
||||||
|
|
||||||
|
import (
|
||||||
|
"syscall"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Operator that is used for argument comparison.
|
||||||
|
type Operator int
|
||||||
|
|
||||||
|
const (
|
||||||
|
EqualTo Operator = iota
|
||||||
|
NotEqualTo
|
||||||
|
GreatherThan
|
||||||
|
LessThan
|
||||||
|
MaskEqualTo
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
jumpJT = 0xff
|
||||||
|
jumpJF = 0xff
|
||||||
|
labelJT = 0xfe
|
||||||
|
labelJF = 0xfe
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
pfLD = 0x0
|
||||||
|
retKill = 0x00000000
|
||||||
|
retTrap = 0x00030000
|
||||||
|
retAllow = 0x7fff0000
|
||||||
|
modeFilter = 0x2
|
||||||
|
prSetNoNewPrivileges = 0x26
|
||||||
|
)
|
||||||
|
|
||||||
|
func actionErrno(errno uint32) uint32 {
|
||||||
|
return 0x00050000 | (errno & 0x0000ffff)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
secData = struct {
|
||||||
|
nr int32
|
||||||
|
arch uint32
|
||||||
|
insPointer uint64
|
||||||
|
args [6]uint64
|
||||||
|
}{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
|
||||||
|
)
|
||||||
|
|
||||||
|
var isLittle = func() bool {
|
||||||
|
var (
|
||||||
|
x = 0x1234
|
||||||
|
p = unsafe.Pointer(&x)
|
||||||
|
p2 = (*[unsafe.Sizeof(0)]byte)(p)
|
||||||
|
)
|
||||||
|
if p2[0] == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}()
|
||||||
|
|
||||||
|
var endian endianSupport
|
||||||
|
|
||||||
|
type endianSupport struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e endianSupport) hi(i uint32) uint32 {
|
||||||
|
if isLittle {
|
||||||
|
return e.little(i)
|
||||||
|
}
|
||||||
|
return e.big(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e endianSupport) low(i uint32) uint32 {
|
||||||
|
if isLittle {
|
||||||
|
return e.big(i)
|
||||||
|
}
|
||||||
|
return e.little(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (endianSupport) big(idx uint32) uint32 {
|
||||||
|
if idx >= 6 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return uint32(unsafe.Offsetof(secData.args)) + 8*idx
|
||||||
|
}
|
||||||
|
|
||||||
|
func (endianSupport) little(idx uint32) uint32 {
|
||||||
|
if idx < 0 || idx >= 6 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return uint32(unsafe.Offsetof(secData.args)) +
|
||||||
|
uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
|
||||||
|
}
|
||||||
|
|
||||||
|
func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error {
|
||||||
|
_, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
|
||||||
|
if err != 0 {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newSockFprog(filter []sockFilter) *sockFprog {
|
||||||
|
return &sockFprog{
|
||||||
|
len: uint16(len(filter)),
|
||||||
|
filt: filter,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type sockFprog struct {
|
||||||
|
len uint16
|
||||||
|
filt []sockFilter
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *sockFprog) set() error {
|
||||||
|
_, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
|
||||||
|
uintptr(modeFilter), uintptr(unsafe.Pointer(s)))
|
||||||
|
if err != 0 {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -99,5 +99,8 @@ func (l *linuxStandardInit) Init() error {
|
||||||
if syscall.Getppid() != l.parentPid {
|
if syscall.Getppid() != l.parentPid {
|
||||||
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
|
||||||
}
|
}
|
||||||
|
if err := finalizeSeccomp(l.config); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,16 +21,20 @@ var setNsMap = map[string]uintptr{
|
||||||
"linux/s390x": 339,
|
"linux/s390x": 339,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
|
||||||
|
|
||||||
|
func SysSetns() uint32 {
|
||||||
|
return uint32(sysSetns)
|
||||||
|
}
|
||||||
|
|
||||||
func Setns(fd uintptr, flags uintptr) error {
|
func Setns(fd uintptr, flags uintptr) error {
|
||||||
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
|
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
|
||||||
if !exists {
|
if !exists {
|
||||||
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
|
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
|
||||||
}
|
}
|
||||||
|
|
||||||
_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
|
_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
|
||||||
if err != 0 {
|
if err != 0 {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue