Improve seccomp API

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>

Conflicts:
	configs/config.go
	container_linux.go
	seccomp/seccomp.go
	seccomp/seccomp.test
This commit is contained in:
Michael Crosby 2015-05-29 15:24:18 -07:00
parent 4a99434e8b
commit 5edcda910e
24 changed files with 882 additions and 1809 deletions

View File

@ -18,8 +18,6 @@ direct-test-short:
go test $(TEST_TAGS) -cover -test.short -v $(GO_PACKAGES) go test $(TEST_TAGS) -cover -test.short -v $(GO_PACKAGES)
direct-build: direct-build:
chmod 755 hack/seccomp.sh
hack/seccomp.sh
go build -v $(GO_PACKAGES) go build -v $(GO_PACKAGES)
direct-install: direct-install:

View File

@ -13,8 +13,38 @@ type IDMap struct {
Size int `json:"size"` Size int `json:"size"`
} }
type SeccompConf struct { type Seccomp struct {
SysCalls []int `json:"syscalls"` Syscalls []*Syscall `json:"syscalls"`
}
type Action int
const (
Kill Action = iota - 3
Trap
Allow
)
type Operator int
const (
EqualTo Operator = iota
NotEqualTo
GreatherThan
LessThan
MaskEqualTo
)
type Arg struct {
Index int `json:"index"`
Value uint32 `json:"value"`
Op Operator `json:"op"`
}
type Syscall struct {
Value int `json:"value"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
} }
// TODO Windows. Many of these fields should be factored out into those parts // TODO Windows. Many of these fields should be factored out into those parts
@ -109,6 +139,8 @@ type Config struct {
// sysctl -w my.property.name value in Linux. // sysctl -w my.property.name value in Linux.
SystemProperties map[string]string `json:"system_properties"` SystemProperties map[string]string `json:"system_properties"`
// SysCalls specify the system calls to keep when executing the process inside the container // Seccomp allows actions to be taken whenever a syscall is made within the container.
Seccomps SeccompConf `json:"seccomp"` // By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
// can be specified on a per syscall basis.
Seccomp *Seccomp `json:"seccomp"`
} }

View File

@ -4,10 +4,6 @@ package configs
import "syscall" import "syscall"
var (
CLONE_SECCOMP = 0x10000 //diffrent from other flag, hard code
)
func (n *Namespace) Syscall() int { func (n *Namespace) Syscall() int {
return namespaceInfo[n.Type] return namespaceInfo[n.Type]
} }
@ -19,7 +15,6 @@ var namespaceInfo = map[NamespaceType]int{
NEWIPC: syscall.CLONE_NEWIPC, NEWIPC: syscall.CLONE_NEWIPC,
NEWUTS: syscall.CLONE_NEWUTS, NEWUTS: syscall.CLONE_NEWUTS,
NEWPID: syscall.CLONE_NEWPID, NEWPID: syscall.CLONE_NEWPID,
NEWSECCOMP: CLONE_SECCOMP,
} }
// CloneFlags parses the container's Namespaces options to set the correct // CloneFlags parses the container's Namespaces options to set the correct

View File

@ -11,7 +11,6 @@ const (
NEWUTS NamespaceType = "NEWUTS" NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC" NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER" NEWUSER NamespaceType = "NEWUSER"
NEWSECCOMP NamespaceType = "NEWSECCOMP"
) )
func NamespaceTypes() []NamespaceType { func NamespaceTypes() []NamespaceType {

View File

@ -169,13 +169,6 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
cmd.SysProcAttr.Credential = &syscall.Credential{} cmd.SysProcAttr.Credential = &syscall.Credential{}
} }
} }
if cloneFlags&uintptr(configs.CLONE_SECCOMP) != 0 {
//os don't surport for CLONE_SECCOMP, remote it
c.config.Namespaces.Remove(configs.NEWSECCOMP)
cloneFlags = c.config.Namespaces.CloneFlags()
} else {
c.config.Seccomps.SysCalls = []int{}
}
cmd.Env = append(cmd.Env, t) cmd.Env = append(cmd.Env, t)
cmd.SysProcAttr.Cloneflags = cloneFlags cmd.SysProcAttr.Cloneflags = cloneFlags
return &initProcess{ return &initProcess{

View File

@ -1,58 +0,0 @@
#!/usr/bin/perl
# ./seccomp.pl < syscall.sample > seccompsyscall.go
use strict;
use warnings;
my $pid = open(my $in, "-|") // die "Couldn't fork1 ($!)\n";
if($pid == 0) {
$pid = open(my $out, "|-") // die "Couldn't fork2 ($!)\n";
if($pid == 0) {
exec "cpp" or die "Couldn't exec cpp ($!)\n";
exit 1;
}
print $out "#include <sys/syscall.h>\n";
while(<>) {
if(/^\w/) {
my $name="$_";
chomp($name);
print $out $name;
print $out " = ";
print $out "__NR_$_";
}
}
close $out;
exit 0;
}
print "//";
system("uname -m");
print "package seccomp\r\n\r\n";
print "var syscallMap = map[string] int {\n";
while(<$in>) {
my $line=$_;
if($line =~ /^[\da-z_]/)
{
my @personal=split(/=/);
$personal[0] =~ s/[ ]//;
$personal[1] =~ s/[\r\n]//;
print " \"";
print $personal[0];
print "\"";
print " : ";
if (($personal[1] !~ /[0-9]/) || length($personal[1]) > 4)
{
print "-1,\r\n";
}else{
print $personal[1];
print ",\r\n";
}
}
}
print "}\r\n";

View File

@ -1,4 +0,0 @@
#/bin/bash
chmod 755 hack/seccomp.pl
hack/seccomp.pl < hack/syscall.sample > seccomp/seccompsyscall.go

View File

@ -1,405 +0,0 @@
access
chdir
chmod
chown
chown32
close
creat
dup
dup2
dup3
epoll_create
epoll_create1
epoll_ctl
epoll_ctl_old
epoll_pwait
epoll_wait
epoll_wait_old
eventfd
eventfd2
faccessat
fadvise64
fadvise64_64
fallocate
fanotify_init
fanotify_mark
ioctl
fchdir
fchmod
fchmodat
fchown
fchown32
fchownat
fcntl
fcntl64
fdatasync
fgetxattr
flistxattr
flock
fremovexattr
fsetxattr
fstat
fstat64
fstatat64
fstatfs
fstatfs64
fsync
ftruncate
ftruncate64
getcwd
getdents
getdents64
getxattr
inotify_add_watch
inotify_init
inotify_init1
inotify_rm_watch
io_cancel
io_destroy
io_getevents
io_setup
io_submit
lchown
lchown32
lgetxattr
link
linkat
listxattr
llistxattr
llseek
_llseek
lremovexattr
lseek
lsetxattr
lstat
lstat64
mkdir
mkdirat
mknod
mknodat
newfstatat
_newselect
oldfstat
oldlstat
oldolduname
oldstat
olduname
oldwait4
open
openat
pipe
pipe2
poll
ppoll
pread64
preadv
futimesat
pselect6
pwrite64
pwritev
read
readahead
readdir
readlink
readlinkat
readv
removexattr
rename
renameat
rmdir
select
sendfile
sendfile64
setxattr
splice
stat
stat64
statfs
statfs64
symlink
symlinkat
sync
sync_file_range
sync_file_range2
syncfs
tee
truncate
truncate64
umask
unlink
unlinkat
ustat
utime
utimensat
utimes
write
writev
// Network related
accept
accept4
bind
connect
getpeername
getsockname
getsockopt
listen
recv
recvfrom
recvmmsg
recvmsg
send
sendmmsg
sendmsg
sendto
setsockopt
shutdown
socket
socketcall
socketpair
sethostname
// Signal related
pause
rt_sigaction
rt_sigpending
rt_sigprocmask
rt_sigqueueinfo
rt_sigreturn
rt_sigsuspend
rt_sigtimedwait
rt_tgsigqueueinfo
sigaction
sigaltstack
signal
signalfd
signalfd4
sigpending
sigprocmask
sigreturn
sigsuspend
// Other needed POSIX
alarm
brk
clock_adjtime
clock_getres
clock_gettime
clock_nanosleep
clock_settime
gettimeofday
nanosleep
nice
sysinfo
syslog
time
timer_create
timer_delete
timerfd_create
timerfd_gettime
timerfd_settime
timer_getoverrun
timer_gettime
timer_settime
times
uname
// Memory control
madvise
mbind
mincore
mlock
mlockall
mmap
mmap2
mprotect
mremap
msync
munlock
munlockall
munmap
remap_file_pages
set_mempolicy
vmsplice
// Process control
capget
capset
clone
execve
exit
exit_group
fork
getcpu
getpgid
getpgrp
getpid
getppid
getpriority
getresgid
getresgid32
getresuid
getresuid32
getrlimit
getrusage
getsid
getuid
getuid32
getegid
getegid32
geteuid
geteuid32
getgid
getgid32
getgroups
getgroups32
getitimer
get_mempolicy
kill
prctl
prlimit64
sched_getaffinity
sched_getparam
sched_get_priority_max
sched_get_priority_min
sched_getscheduler
sched_rr_get_interval
sched_setaffinity
sched_setparam
sched_setscheduler
sched_yield
setfsgid
setfsgid32
setfsuid
setfsuid32
setgid
setgid32
setgroups
setgroups32
setitimer
setpgid
setpriority
setregid
setregid32
setresgid
setresgid32
setresuid
setresuid32
setreuid
setreuid32
setrlimit
setsid
setuid
setuid32
ugetrlimit
vfork
wait4
waitid
waitpid
// IPC
ipc
mq_getsetattr
mq_notify
mq_open
mq_timedreceive
mq_timedsend
mq_unlink
msgctl
msgget
msgrcv
msgsnd
semctl
semget
semop
semtimedop
shmat
shmctl
shmdt
shmget
// Linux specific, mostly needed for thread-related stuff
arch_prctl
get_robust_list
get_thread_area
gettid
futex
restart_syscall
set_robust_list
set_thread_area
set_tid_address
tgkill
tkill
// Admin syscalls, these are blocked
acct
adjtimex
bdflush
chroot
create_module
delete_module
get_kernel_syms
idle
init_module
ioperm
iopl
ioprio_get
ioprio_set
kexec_load
lookup_dcookie
migrate_pages
modify_ldt
mount
move_pages
name_to_handle_at
nfsservctl
open_by_handle_at
perf_event_open
pivot_root
process_vm_readv
process_vm_writev
ptrace
query_module
quotactl
reboot
setdomainname
setns
settimeofday
sgetmask
ssetmask
stime
swapoff
swapon
_sysctl
sysfs
sys_setaltroot
umount
umount2
unshare
uselib
vhangup
vm86
vm86old
// Kernel key management
add_key
keyctl
request_key
// Unimplemented
afs_syscall
break
ftime
getpmsg
gtty
lock
madvise1
mpx
prof
profil
putpmsg
security
stty
tuxcall
ulimit
vserver

View File

@ -262,13 +262,59 @@ func killCgroupProcesses(m cgroups.Manager) error {
} }
func finalizeSeccomp(config *initConfig) error { func finalizeSeccomp(config *initConfig) error {
if len(config.Config.Seccomps.SysCalls) > 0 { if config.Config.Seccomp == nil {
scmpCtx, _ := seccomp.ScmpInit(seccomp.ScmpActAllow)
for _, key := range config.Config.Seccomps.SysCalls {
seccomp.ScmpAdd(scmpCtx, key, seccomp.ScmpActAllow)
}
return seccomp.ScmpLoad(scmpCtx)
}
return nil return nil
}
context := seccomp.New()
for _, s := range config.Config.Seccomp.Syscalls {
ss := &seccomp.Syscall{
Value: uint32(s.Value),
Action: seccompAction(s.Action),
}
if len(s.Args) > 0 {
ss.Args = seccompArgs(s.Args)
}
context.Add(ss)
}
return context.Load()
}
func seccompAction(a configs.Action) seccomp.Action {
switch a {
case configs.Kill:
return seccomp.Kill
case configs.Trap:
return seccomp.Trap
case configs.Allow:
return seccomp.Allow
}
return seccomp.Error(syscall.Errno(int(a)))
}
func seccompArgs(args []*configs.Arg) seccomp.Args {
var sa []seccomp.Arg
for _, a := range args {
sa = append(sa, seccomp.Arg{
Index: uint32(a.Index),
Op: seccompOperator(a.Op),
Value: uint(a.Value),
})
}
return seccomp.Args{sa}
}
func seccompOperator(o configs.Operator) seccomp.Operator {
switch o {
case configs.EqualTo:
return seccomp.EqualTo
case configs.NotEqualTo:
return seccomp.NotEqualTo
case configs.GreatherThan:
return seccomp.GreatherThan
case configs.LessThan:
return seccomp.LessThan
case configs.MaskEqualTo:
return seccomp.MaskEqualTo
}
return 0
} }

View File

@ -1,15 +1,10 @@
package integration package integration
import ( import (
"bufio"
"bytes" "bytes"
"errors"
"fmt"
"io"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"runtime"
"strconv" "strconv"
"strings" "strings"
"syscall" "syscall"
@ -720,103 +715,7 @@ func TestSystemProperties(t *testing.T) {
} }
} }
func genSeccompConfigFile(file string, calls []int) error { func TestSeccompNoChown(t *testing.T) {
callBegin := 0
callEnd := 0
if runtime.GOARCH == "386" {
callEnd = 340
} else if runtime.GOARCH == "amd64" {
callEnd = 302
} else if runtime.GOARCH == "arm" {
callEnd = 377
} else if runtime.GOARCH == "arm64" {
callEnd = 281
} else if runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" {
callEnd = 354
}
conf := fmt.Sprintf("%d\nwhitelist\n", 1)
i := 0
nr := callBegin
for nr <= callEnd {
j := 0
for _, key := range calls {
if nr == key {
break
}
j++
}
if j == len(calls) {
callfilter := fmt.Sprintf("%d\n", nr)
conf += callfilter
i++
}
nr++
}
fout, err := os.Create(file)
defer fout.Close()
if err == nil {
fout.WriteString(conf)
}
return nil
}
func genSeccompSyscall(configFile string, Seccomps *configs.SeccompConf) error {
f, err := os.Open(configFile)
defer f.Close()
if nil == err {
buff := bufio.NewReader(f)
firstl, err := buff.ReadString('\n')
if err != nil || io.EOF == err {
return errors.New("initSeccomp ReadString, firstl")
}
ver := 0
fmt.Sscanf(firstl, "%d\n", &ver)
if err != nil || 1 != ver {
return errors.New("initSeccomp Sscanf")
}
secondl, err := buff.ReadString('\n')
if err != nil || io.EOF == err || strings.EqualFold(secondl, "whitelist") {
return errors.New("initSeccomp ReadString, secondl")
}
nr := 0
for {
line, err := buff.ReadString('\n')
if err != nil || io.EOF == err {
break
}
fmt.Sscanf(line, "%d\n", &nr)
Seccomps.SysCalls = append(Seccomps.SysCalls, nr)
}
return nil
}
return nil
}
func TestSeccompNotStat(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
exceptCall := []int{syscall.SYS_STAT}
genSeccompConfigFile("seccomp.conf", exceptCall)
genSeccompSyscall("seccomp.conf", &config.Seccomps)
out, _, err := runContainer(config, "", "/bin/sh", "-c", "ls / -l")
if err == nil {
t.Fatal("runontainer[ls without SYS_STAT] should be failed")
} else {
fmt.Println(out)
}
}
func TestSeccompStat(t *testing.T) {
if testing.Short() { if testing.Short() {
return return
} }
@ -825,14 +724,17 @@ func TestSeccompStat(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
defer remove(rootfs) defer remove(rootfs)
config := newTemplateConfig(rootfs) config := newTemplateConfig(rootfs)
exceptCall := []int{} config.Seccomp = &configs.Seccomp{}
genSeccompConfigFile("seccomp.conf", exceptCall) config.Seccomp.Syscalls = append(config.Seccomp.Syscalls, &configs.Syscall{
genSeccompSyscall("seccomp.conf", &config.Seccomps) Value: syscall.SYS_CHOWN,
out, _, err := runContainer(config, "", "/bin/sh", "-c", "ls / -l") Action: configs.Action(syscall.EPERM),
if err != nil { })
t.Fatal(err) buffers, _, err := runContainer(config, "", "/bin/sh", "-c", "chown 1:1 /tmp")
if err == nil {
t.Fatal("running chown in a container should fail")
}
if s := buffers.String(); !strings.Contains(s, "not permitted") {
t.Fatalf("running chown should result in an EPERM but got %q", s)
} }
fmt.Println(out)
} }

View File

@ -44,7 +44,6 @@ func newTemplateConfig(rootfs string) *configs.Config {
{Type: configs.NEWIPC}, {Type: configs.NEWIPC},
{Type: configs.NEWPID}, {Type: configs.NEWPID},
{Type: configs.NEWNET}, {Type: configs.NEWNET},
{Type: configs.NEWSECCOMP},
}), }),
Cgroups: &configs.Cgroup{ Cgroups: &configs.Cgroup{
Name: "test", Name: "test",
@ -115,8 +114,5 @@ func newTemplateConfig(rootfs string) *configs.Config {
Soft: uint64(1025), Soft: uint64(1025),
}, },
}, },
Seccomps: configs.SeccompConf{
SysCalls: make([]int, 0, 512),
},
} }
} }

View File

@ -122,11 +122,11 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe
err = container.Start(process) err = container.Start(process)
if err != nil { if err != nil {
return nil, -1, err return buffers, -1, err
} }
ps, err := process.Wait() ps, err := process.Wait()
if err != nil { if err != nil {
return nil, -1, err return buffers, -1, err
} }
status := ps.Sys().(syscall.WaitStatus) status := ps.Sys().(syscall.WaitStatus)
if status.Exited() { if status.Exited() {
@ -134,7 +134,7 @@ func runContainer(config *configs.Config, console string, args ...string) (buffe
} else if status.Signaled() { } else if status.Signaled() {
exitCode = -int(status.Signal()) exitCode = -int(status.Signal())
} else { } else {
return nil, -1, err return buffers, -1, err
} }
return return
} }

View File

@ -19,32 +19,33 @@ import (
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
var createFlags = []cli.Flag{ var createFlags = []cli.Flag{
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"}, cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"}, cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"},
cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"}, cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"},
cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"}, cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"},
cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"}, cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"},
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"},
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"}, cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"},
cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"}, cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"},
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"}, cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"},
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"}, cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"},
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"}, cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"},
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"}, cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
cli.StringFlag{Name: "security", Value: "", Usage: "set the security profile (high, medium, low)"},
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
cli.StringFlag{Name: "veth-address", Usage: "veth ip address"}, cli.StringFlag{Name: "veth-address", Usage: "veth ip address"},
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"}, cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"},
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"}, cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
cli.StringSliceFlag{Name: "sysctl", Value: &cli.StringSlice{}, Usage: "set system properties in the container"}, cli.StringSliceFlag{Name: "sysctl", Value: &cli.StringSlice{}, Usage: "set system properties in the container"},
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
} }
var configCommand = cli.Command{ var configCommand = cli.Command{
@ -203,6 +204,24 @@ func modify(config *configs.Config, context *cli.Context) {
Device: "cgroup", Device: "cgroup",
}) })
} }
modifySecurityProfile(context, config)
}
func modifySecurityProfile(context *cli.Context, config *configs.Config) {
profileName := context.String("security")
if profileName == "" {
return
}
profile := profiles[profileName]
if profile == nil {
logrus.Fatalf("invalid profile name %q", profileName)
}
config.Rlimits = profile.Rlimits
config.Capabilities = profile.Capabilities
config.Seccomp = profile.Seccomp
config.AppArmorProfile = profile.ApparmorProfile
config.MountLabel = profile.MountLabel
config.ProcessLabel = profile.ProcessLabel
} }
func getTemplate() *configs.Config { func getTemplate() *configs.Config {
@ -290,13 +309,5 @@ func getTemplate() *configs.Config {
Flags: defaultMountFlags | syscall.MS_RDONLY, Flags: defaultMountFlags | syscall.MS_RDONLY,
}, },
}, },
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
} }
} }

272
nsinit/security.go Normal file
View File

@ -0,0 +1,272 @@
package main
import (
"syscall"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/system"
)
var profiles = map[string]*securityProfile{
"high": highProfile,
"medium": mediumProfile,
"low": lowProfile,
}
type securityProfile struct {
Capabilities []string `json:"capabilities"`
ApparmorProfile string `json:"apparmor_profile"`
MountLabel string `json:"mount_label"`
ProcessLabel string `json:"process_label"`
Rlimits []configs.Rlimit `json:"rlimits"`
Seccomp *configs.Seccomp `json:"seccomp"`
}
// this should be a runtime config that is not able to do things like apt-get or yum install.
var highProfile = &securityProfile{
Capabilities: []string{
"NET_BIND_SERVICE",
"KILL",
"AUDIT_WRITE",
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
// http://man7.org/linux/man-pages/man2/syscalls.2.html
Seccomp: &configs.Seccomp{
Syscalls: []*configs.Syscall{
{
Value: syscall.SYS_CAPSET, // http://man7.org/linux/man-pages/man2/capset.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: int(system.SysSetns()),
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CHMOD, // http://man7.org/linux/man-pages/man2/chmod.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CHOWN, // http://man7.org/linux/man-pages/man2/chown.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_LINK, // http://man7.org/linux/man-pages/man2/link.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_LINKAT, // http://man7.org/linux/man-pages/man2/linkat.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UNLINK, // http://man7.org/linux/man-pages/man2/unlink.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UNLINKAT, // http://man7.org/linux/man-pages/man2/unlinkat.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CHROOT, // http://man7.org/linux/man-pages/man2/chroot.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_SETDOMAINNAME, // http://man7.org/linux/man-pages/man2/setdomainname.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_SETHOSTNAME, // http://man7.org/linux/man-pages/man2/sethostname.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
Action: configs.Action(syscall.EPERM),
Args: []*configs.Arg{
{
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
Value: syscall.CLONE_NEWUSER,
Op: configs.MaskEqualTo,
},
},
},
},
},
}
// This is a medium level profile that should be able to do things like installing from
// apt-get or yum.
var mediumProfile = &securityProfile{
Capabilities: []string{
"CHOWN",
"DAC_OVERRIDE",
"FSETID",
"FOWNER",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"KILL",
"AUDIT_WRITE",
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
// http://man7.org/linux/man-pages/man2/syscalls.2.html
Seccomp: &configs.Seccomp{
Syscalls: []*configs.Syscall{
{
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: int(system.SysSetns()),
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CHROOT, // http://man7.org/linux/man-pages/man2/chroot.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_SETDOMAINNAME, // http://man7.org/linux/man-pages/man2/setdomainname.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_SETHOSTNAME, // http://man7.org/linux/man-pages/man2/sethostname.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
Action: configs.Action(syscall.EPERM),
Args: []*configs.Arg{
{
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
Value: syscall.CLONE_NEWUSER,
Op: configs.MaskEqualTo,
},
},
},
},
},
}
var lowProfile = &securityProfile{
Capabilities: []string{
"CHOWN",
"DAC_OVERRIDE",
"FSETID",
"FOWNER",
"SETGID",
"SETUID",
"SYS_CHROOT",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"KILL",
"AUDIT_WRITE",
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
// http://man7.org/linux/man-pages/man2/syscalls.2.html
Seccomp: &configs.Seccomp{
Syscalls: []*configs.Syscall{
{
Value: syscall.SYS_UNSHARE, // http://man7.org/linux/man-pages/man2/unshare.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: int(system.SysSetns()),
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_MOUNT, // http://man7.org/linux/man-pages/man2/mount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_UMOUNT2, // http://man7.org/linux/man-pages/man2/umount.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CREATE_MODULE, // http://man7.org/linux/man-pages/man2/create_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_DELETE_MODULE, // http://man7.org/linux/man-pages/man2/delete_module.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_KEXEC_LOAD, // http://man7.org/linux/man-pages/man2/kexec_load.2.html
Action: configs.Action(syscall.EPERM),
},
{
Value: syscall.SYS_CLONE, // http://man7.org/linux/man-pages/man2/clone.2.html
Action: configs.Action(syscall.EPERM),
Args: []*configs.Arg{
{
Index: 0, // the glibc wrapper has the flags at arg2 but the raw syscall has flags at arg0
Value: syscall.CLONE_NEWUSER,
Op: configs.MaskEqualTo,
},
},
},
},
},
}

32
seccomp/bpf.go Normal file
View File

@ -0,0 +1,32 @@
package seccomp
import "strings"
type bpfLabel struct {
label string
location uint32
}
type bpfLabels []bpfLabel
// labelIndex returns the index for the label if it exists in the slice.
// if it does not exist in the slice it appends the label lb to the end
// of the slice and returns the index.
func labelIndex(labels *bpfLabels, lb string) uint32 {
var id uint32
for id = 0; id < uint32(len(*labels)); id++ {
if strings.EqualFold(lb, (*labels)[id].label) {
return id
}
}
*labels = append(*labels, bpfLabel{lb, 0xffffffff})
return id
}
func scmpBpfStmt(code uint16, k uint32) sockFilter {
return sockFilter{code, 0, 0, k}
}
func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
return sockFilter{code, jt, jf, k}
}

144
seccomp/context.go Normal file
View File

@ -0,0 +1,144 @@
package seccomp
import (
"errors"
"syscall"
)
const labelTemplate = "lb-%d-%d"
// Action is the type of action that will be taken when a
// syscall is performed.
type Action int
const (
Kill Action = iota - 3 // Kill the calling process of the syscall.
Trap // Trap and coredump the calling process of the syscall.
Allow // Allow the syscall to be completed.
)
// Syscall is the specified syscall, action, and any type of arguments
// to filter on.
type Syscall struct {
// Value is the syscall number.
Value uint32
// Action is the action to perform when the specified syscall is made.
Action Action
// Args are filters that can be specified on the arguments to the syscall.
Args Args
}
func (s *Syscall) scmpAction() uint32 {
switch s.Action {
case Allow:
return retAllow
case Trap:
return retTrap
case Kill:
return retKill
}
return actionErrno(uint32(s.Action))
}
// Arg represents an argument to the syscall with the argument's index,
// the operator to apply when matching, and the argument's value at that time.
type Arg struct {
Index uint32 // index of args which start from zero
Op Operator // operation, such as EQ/NE/GE/LE
Value uint // the value of arg
}
type Args [][]Arg
var (
ErrUnresolvedLabel = errors.New("seccomp: unresolved label")
ErrDuplicateLabel = errors.New("seccomp: duplicate label use")
ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument")
)
// Error returns an Action that will be used to send the calling
// process the specified errno when the syscall is made.
func Error(code syscall.Errno) Action {
return Action(code)
}
// New returns a new syscall context for use.
func New() *Context {
return &Context{
syscalls: make(map[uint32]*Syscall),
}
}
// Context holds syscalls for the current process to limit the type of
// actions the calling process can make.
type Context struct {
syscalls map[uint32]*Syscall
}
// Add will add the specified syscall, action, and arguments to the seccomp
// Context.
func (c *Context) Add(s *Syscall) {
c.syscalls[s.Value] = s
}
// Remove removes the specified syscall configuration from the Context.
func (c *Context) Remove(call uint32) {
delete(c.syscalls, call)
}
// Load will apply the Context to the calling process makeing any secccomp process changes
// apply after the context is loaded.
func (c *Context) Load() error {
filter, err := c.newFilter()
if err != nil {
return err
}
if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil {
return err
}
prog := newSockFprog(filter)
return prog.set()
}
func (c *Context) newFilter() ([]sockFilter, error) {
var (
labels bpfLabels
f = newFilter()
)
for _, s := range c.syscalls {
f.addSyscall(s, &labels)
}
f.allow()
// process args for the syscalls
for _, s := range c.syscalls {
if err := f.addArguments(s, &labels); err != nil {
return nil, err
}
}
// apply labels for arguments
idx := int32(len(*f) - 1)
for ; idx >= 0; idx-- {
lf := &(*f)[idx]
if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) {
continue
}
rel := int32(lf.jt)<<8 | int32(lf.jf)
if ((jumpJT << 8) | jumpJF) == rel {
if labels[lf.k].location == 0xffffffff {
return nil, ErrUnresolvedLabel
}
lf.k = labels[lf.k].location - uint32(idx+1)
lf.jt = 0
lf.jf = 0
} else if ((labelJT << 8) | labelJF) == rel {
if labels[lf.k].location != 0xffffffff {
return nil, ErrDuplicateLabel
}
labels[lf.k].location = uint32(idx)
lf.k = 0
lf.jt = 0
lf.jf = 0
}
}
return *f, nil
}

116
seccomp/filter.go Normal file
View File

@ -0,0 +1,116 @@
package seccomp
import (
"fmt"
"syscall"
"unsafe"
)
type sockFilter struct {
code uint16
jt uint8
jf uint8
k uint32
}
func newFilter() *filter {
var f filter
f = append(f, sockFilter{
pfLD + syscall.BPF_W + syscall.BPF_ABS,
0,
0,
uint32(unsafe.Offsetof(secData.nr)),
})
return &f
}
type filter []sockFilter
func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) {
if len(s.Args) == 0 {
f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()))
} else {
if len(s.Args[0]) > 0 {
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index)
f.call(s.Value,
scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
jumpJT, jumpJF))
}
}
}
func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error {
for i := 0; len(s.Args) > i; i++ {
if len(s.Args[i]) > 0 {
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index)
f.label(labels, lb)
f.arg(s.Args[i][0].Index)
}
for j := 0; j < len(s.Args[i]); j++ {
var jf sockFilter
if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 {
lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index)
jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA,
labelIndex(labels, lbj), jumpJT, jumpJF)
} else {
jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())
}
if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil {
return err
}
}
f.allow()
}
return nil
}
func (f *filter) label(labels *bpfLabels, lb string) {
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF))
}
func (f *filter) call(nr uint32, jt sockFilter) {
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1))
*f = append(*f, jt)
}
func (f *filter) allow() {
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow))
}
func (f *filter) deny() {
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap))
}
func (f *filter) arg(index uint32) {
arg(f, index)
}
func (f *filter) op(operation Operator, v uint, jf sockFilter) error {
switch operation {
case EqualTo:
jumpEqualTo(f, v, jf)
case NotEqualTo:
jumpNotEqualTo(f, v, jf)
case GreatherThan:
jumpGreaterThan(f, v, jf)
case LessThan:
jumpLessThan(f, v, jf)
case MaskEqualTo:
jumpMaskEqualTo(f, v, jf)
default:
return ErrUnsupportedOperation
}
return nil
}
func arg(f *filter, idx uint32) {
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx)))
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx)))
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1))
}
func jump(f *filter, labels *bpfLabels, lb string) {
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
jumpJT, jumpJF))
}

68
seccomp/jump_amd64.go Normal file
View File

@ -0,0 +1,68 @@
// +build linux,amd64
package seccomp
// Using BPF filters
//
// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf
import "syscall"
func jumpGreaterThan(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
func jumpEqualTo(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
func jumpLessThan(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
func jumpNotEqualTo(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
// this checks for a value inside a mask. The evalusation is equal to doing
// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER
func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v)))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}

View File

@ -1,77 +1,110 @@
// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go.
package seccomp package seccomp
import ( import (
"errors"
"fmt"
"os"
"os/signal"
"runtime"
"strings"
"syscall" "syscall"
"unsafe" "unsafe"
) )
// Operator that is used for argument comparison.
type Operator int
const ( const (
EQ = 0 EqualTo Operator = iota
NE = 1 NotEqualTo
GE = 2 GreatherThan
LE = 3 LessThan
MaskEqualTo
) )
const ( const (
ALLOW = 0 jumpJT = 0xff
DENY = 1 jumpJF = 0xff
JUMP = 2 labelJT = 0xfe
labelJF = 0xfe
) )
const ( const (
JUMP_JT = 0xff pfLD = 0x0
JUMP_JF = 0xff retKill = 0x00000000
LABEL_JT = 0xfe retTrap = 0x00030000
LABEL_JF = 0xfe retAllow = 0x7fff0000
modeFilter = 0x2
prSetNoNewPrivileges = 0x26
) )
const ( func actionErrno(errno uint32) uint32 {
pseudoCall = 30 return 0x00050000 | (errno & 0x0000ffff)
) }
const ( var (
ScmpActAllow = 0x0 secData = struct {
PF_LD = 0x0
BPF_RET = syscall.BPF_RET
BPF_K = syscall.BPF_K
BPF_ABS = syscall.BPF_ABS
BPF_JMP = syscall.BPF_JMP
BPF_JEQ = syscall.BPF_JEQ
BPF_W = syscall.BPF_W
BPF_LD = syscall.BPF_LD
BPF_JA = syscall.BPF_JA
BPF_MEM = syscall.BPF_MEM
BPF_ST = syscall.BPF_ST
BPF_JGT = syscall.BPF_JGT
BPF_JGE = syscall.BPF_JGE
BPF_JSET = syscall.BPF_JSET
SECCOMP_RET_KILL = 0x00000000
SECCOMP_RET_TRAP = 0x00030000
SECCOMP_RET_ALLOW = 0x7fff0000
SECCOMP_MODE_FILTER = 0x2
PR_SET_NO_NEW_PRIVS = 0x26
)
type seccompData struct {
nr int32 nr int32
arch uint32 arch uint32
insPointer uint64 insPointer uint64
args [6]uint64 args [6]uint64
}{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
)
var isLittle = func() bool {
var (
x = 0x1234
p = unsafe.Pointer(&x)
p2 = (*[unsafe.Sizeof(0)]byte)(p)
)
if p2[0] == 0 {
return false
}
return true
}()
var endian endianSupport
type endianSupport struct {
} }
type sockFilter struct { func (e endianSupport) hi(i uint32) uint32 {
code uint16 if isLittle {
jt uint8 return e.little(i)
jf uint8 }
k uint32 return e.big(i)
}
func (e endianSupport) low(i uint32) uint32 {
if isLittle {
return e.big(i)
}
return e.little(i)
}
func (endianSupport) big(idx uint32) uint32 {
if idx >= 6 {
return 0
}
return uint32(unsafe.Offsetof(secData.args)) + 8*idx
}
func (endianSupport) little(idx uint32) uint32 {
if idx < 0 || idx >= 6 {
return 0
}
return uint32(unsafe.Offsetof(secData.args)) +
uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
}
func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error {
_, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
if err != 0 {
return err
}
return nil
}
func newSockFprog(filter []sockFilter) *sockFprog {
return &sockFprog{
len: uint16(len(filter)),
filt: filter,
}
} }
type sockFprog struct { type sockFprog struct {
@ -79,440 +112,11 @@ type sockFprog struct {
filt []sockFilter filt []sockFilter
} }
type FilterArgs struct { func (s *sockFprog) set() error {
Args []Filter _, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
} uintptr(modeFilter), uintptr(unsafe.Pointer(s)))
if err != 0 {
type Action struct { return err
action int
args []FilterArgs
}
type Filter struct {
Arg uint32 //index of args which start from zero
Op int //operation, such ass EQ/NE/GE/LE
V uint //the value of arg
}
type bpfLabel struct {
label string
location uint32
}
type bpfLabels struct {
count uint32
labels []bpfLabel
}
type ScmpCtx struct {
CallMap map[int]*Action
filter []sockFilter
label bpfLabels
}
type argOFunc func(uint32) uint32
type argFunc func(*ScmpCtx, uint32)
type jFunc func(*ScmpCtx, uint, sockFilter)
type addFunc func(ctx *ScmpCtx, call int, action int, args ...FilterArgs) error
var secData seccompData = seccompData{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
var hiArg argOFunc
var loArg argOFunc
var arg argFunc
var jEq jFunc
var jNe jFunc
var jGe jFunc
var jLe jFunc
var secAdd addFunc = nil
var op [4]jFunc
var (
sysCallMin = 0
sysCallMax = 0
)
var sigSec bool = false
func arg32(ctx *ScmpCtx, idx uint32) {
ctx.filter = append(ctx.filter,
scmpBpfStmt(BPF_LD+BPF_W+BPF_ABS, loArg(idx)))
}
func jEq32(ctx *ScmpCtx, v uint, jt sockFilter) {
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, uint32(v), 0, 1))
ctx.filter = append(ctx.filter, jt)
}
func jNe32(ctx *ScmpCtx, v uint, jt sockFilter) {
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, uint32(v), 1, 0))
ctx.filter = append(ctx.filter, jt)
}
func jGe32(ctx *ScmpCtx, v uint, jt sockFilter) {
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGE+BPF_K, uint32(v), 0, 1))
ctx.filter = append(ctx.filter, jt)
}
func jLe32(ctx *ScmpCtx, v uint, jt sockFilter) {
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGT+BPF_K, uint32(v), 1, 0))
ctx.filter = append(ctx.filter, jt)
}
func arg64(ctx *ScmpCtx, idx uint32) {
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_W+BPF_ABS, loArg(idx)))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_ST, 0))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_W+BPF_ABS, hiArg(idx)))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_ST, 1))
}
func jNe64(ctx *ScmpCtx, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (hi), 5, 0))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 0))
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (lo), 2, 0))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1))
ctx.filter = append(ctx.filter, jt)
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1))
}
func jGe64(ctx *ScmpCtx, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0))
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 0))
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGE+BPF_K, (lo), 0, 2))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1))
ctx.filter = append(ctx.filter, jt)
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1))
}
func jEq64(ctx *ScmpCtx, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 0))
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1))
ctx.filter = append(ctx.filter, jt)
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1))
}
func jLe64(ctx *ScmpCtx, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGT+BPF_K, (hi), 6, 0))
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 3))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 0))
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0))
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1))
ctx.filter = append(ctx.filter, jt)
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_LD+BPF_MEM, 1))
}
func allow(ctx *ScmpCtx) {
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_RET+BPF_K, SECCOMP_RET_ALLOW))
}
func deny(ctx *ScmpCtx) {
ctx.filter = append(ctx.filter, scmpBpfStmt(BPF_RET+BPF_K, SECCOMP_RET_TRAP))
}
func jump(ctx *ScmpCtx, lb string) {
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JA, findLabel(&ctx.label, lb),
JUMP_JT, JUMP_JF))
}
func label(ctx *ScmpCtx, lb string) {
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JA, findLabel(&ctx.label, lb),
LABEL_JT, LABEL_JF))
}
func secCall(ctx *ScmpCtx, nr int, jt sockFilter) {
ctx.filter = append(ctx.filter, scmpBpfJump(BPF_JMP+BPF_JEQ+BPF_K, uint32(nr), 0, 1))
ctx.filter = append(ctx.filter, jt)
}
func findLabel(labels *bpfLabels, lb string) uint32 {
var id uint32
for id = 0; id < labels.count; id++ {
if true == strings.EqualFold(lb, labels.labels[id].label) {
return id
}
}
tlabel := bpfLabel{lb, 0xffffffff}
labels.labels = append(labels.labels, tlabel)
labels.count += 1
return id
}
func hiArgLittle(idx uint32) uint32 {
if idx < 0 || idx >= 6 {
return 0
}
hi := uint32(unsafe.Offsetof(secData.args)) + uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
return uint32(hi)
}
func hiArgBig(idx uint32) uint32 {
if idx >= 6 {
return 0
}
hi := uint32(unsafe.Offsetof(secData.args)) + 8*idx
return uint32(hi)
}
func isLittle() bool {
litEndian := true
x := 0x1234
p := unsafe.Pointer(&x)
p2 := (*[unsafe.Sizeof(0)]byte)(p)
if p2[0] == 0 {
litEndian = false
}
return litEndian
}
func scmpBpfStmt(code uint16, k uint32) sockFilter {
return sockFilter{code, 0, 0, k}
}
func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
return sockFilter{code, jt, jf, k}
}
func prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) {
_, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
if e1 != 0 {
err = e1
} }
return nil return nil
} }
func scmpfilter(prog *sockFprog) (err error) {
_, _, e1 := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
uintptr(SECCOMP_MODE_FILTER), uintptr(unsafe.Pointer(prog)))
if e1 != 0 {
err = e1
}
return nil
}
func CombineArgs(args1 []FilterArgs, args2 []FilterArgs) []FilterArgs {
ilen1 := len(args1)
if ilen1 > len(args2) {
ilen1 = len(args2)
}
for i1 := 0; i1 < ilen1; i1++ {
jlen1 := len(args1[i1].Args)
jlen2 := len(args2[i1].Args)
for j2 := 0; j2 < jlen2; j2++ {
num := 0
for j1 := 0; j1 < jlen1; j1++ {
if args1[i1].Args[j1] == args2[i1].Args[j2] {
break
}
num = num + 1
}
if num == jlen1 {
args1[i1].Args = append(args1[i1].Args, args2[i1].Args[j2])
}
}
}
if ilen1 < len(args2) {
args1 = append(args1, args2[ilen1:]...)
}
return args1
}
func Sys(call string) int {
number, exists := syscallMap[call]
if exists {
return number
}
return -1
}
func ScmpInit(action int) (*ScmpCtx, error) {
ctx := ScmpCtx{
CallMap: make(map[int]*Action),
filter: make([]sockFilter, 0, 128),
label: bpfLabels{
count: 0,
labels: make([]bpfLabel, 0, 128),
},
}
ctx.filter = append(ctx.filter,
sockFilter{PF_LD + BPF_W + BPF_ABS, 0, 0, uint32(unsafe.Offsetof(secData.nr))})
return &ctx, nil
}
func ScmpDel(ctx *ScmpCtx, call int) error {
_, exists := ctx.CallMap[call]
if exists {
delete(ctx.CallMap, call)
return nil
}
return errors.New("syscall not exist")
}
func ScmpAdd(ctx *ScmpCtx, call int, action int, args ...FilterArgs) error {
if call < 0 {
return errors.New("syscall error, call < 0")
}
if call <= sysCallMax {
_, exists := ctx.CallMap[call]
if exists {
return errors.New("syscall exist")
}
ctx.CallMap[call] = &Action{action, args}
return nil
} else {
if nil != secAdd {
return secAdd(ctx, call, action, args...)
}
}
return errors.New("syscall not surport")
}
func ScmpLoad(ctx *ScmpCtx) error {
for call, act := range ctx.CallMap {
if len(act.args) == 0 {
secCall(ctx, call, scmpBpfStmt(BPF_RET+BPF_K, SECCOMP_RET_ALLOW))
} else {
if len(act.args[0].Args) > 0 {
lb := fmt.Sprintf("lb-%d-%d", call, act.args[0].Args[0].Arg)
secCall(ctx, call,
scmpBpfJump(BPF_JMP+BPF_JA, findLabel(&ctx.label, lb),
JUMP_JT, JUMP_JF))
}
}
}
deny(ctx)
for call, act := range ctx.CallMap {
for i := 0; i < len(act.args); i++ {
if len(act.args[i].Args) > 0 {
lb := fmt.Sprintf("lb-%d-%d", call, act.args[i].Args[0].Arg)
label(ctx, lb)
arg(ctx, act.args[i].Args[0].Arg)
}
for j := 0; j < len(act.args[i].Args); j++ {
var jf sockFilter
if len(act.args)-1 > i && len(act.args[i+1].Args) > 0 {
lbj := fmt.Sprintf("lb-%d-%d", call, act.args[i+1].Args[0].Arg)
jf = scmpBpfJump(BPF_JMP+BPF_JA,
findLabel(&ctx.label, lbj), JUMP_JT, JUMP_JF)
} else {
jf = scmpBpfStmt(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
}
op[act.args[i].Args[j].Op](ctx, act.args[i].Args[j].V, jf)
}
deny(ctx)
}
}
idx := int32(len(ctx.filter) - 1)
for ; idx >= 0; idx-- {
filter := &ctx.filter[idx]
if filter.code != (BPF_JMP + BPF_JA) {
continue
}
rel := int32(filter.jt)<<8 | int32(filter.jf)
if ((JUMP_JT << 8) | JUMP_JF) == rel {
if ctx.label.labels[filter.k].location == 0xffffffff {
return errors.New("Unresolved label")
}
filter.k = ctx.label.labels[filter.k].location - uint32(idx+1)
filter.jt = 0
filter.jf = 0
} else if ((LABEL_JT << 8) | LABEL_JF) == rel {
if ctx.label.labels[filter.k].location != 0xffffffff {
return errors.New("Duplicate label use")
}
ctx.label.labels[filter.k].location = uint32(idx)
filter.k = 0
filter.jt = 0
filter.jf = 0
}
}
prog := sockFprog{
len: uint16(len(ctx.filter)),
filt: ctx.filter,
}
if nil != prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) {
fmt.Println("prctl PR_SET_NO_NEW_PRIVS error")
return errors.New("prctl PR_SET_NO_NEW_PRIVS error")
}
if nil != scmpfilter(&prog) {
fmt.Println("scmpfilter error")
return errors.New("scmpfilter error")
}
return nil
}
func sigSeccomp() {
sigSec = true
}
func ScmpError() bool {
ret := sigSec
sigSec = false
return ret
}
func init() {
if runtime.GOARCH == "386" {
sysCallMax = 340
} else if runtime.GOARCH == "amd64" {
sysCallMax = 302
} else if runtime.GOARCH == "arm" {
sysCallMax = 377
} else if runtime.GOARCH == "arm64" {
sysCallMax = 281
} else if runtime.GOARCH == "ppc64" {
sysCallMax = 354
} else if runtime.GOARCH == "ppc64le" {
sysCallMax = 354
}
if isLittle() {
hiArg = hiArgLittle
loArg = hiArgBig
} else {
hiArg = hiArgBig
loArg = hiArgLittle
}
var length int
if 8 == int(unsafe.Sizeof(length)) {
arg = arg64
jEq = jEq64
jNe = jNe64
jGe = jGe64
jLe = jLe64
} else {
arg = arg32
jEq = jEq32
jNe = jNe32
jGe = jGe32
jLe = jLe32
}
op[EQ] = jEq
op[NE] = jNe
op[GE] = jGe
op[LE] = jLe
chSignal := make(chan os.Signal)
signal.Notify(chSignal, syscall.SIGSYS)
go sigSeccomp()
}

View File

@ -1,107 +0,0 @@
package main
import (
"fmt"
"flag"
"os"
"syscall"
sec "seccomp"
)
const (
STDIN_FILENO = 0
STDOUT_FILENO = 1
BUFLEN = 8
)
func writeOk(args []string) {
scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("exit"), sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("exit_group"), sec.ScmpActAllow)
//the first arg is STDOUT_FILENO, the third arg must be <= BUFLEN
sec.ScmpAdd(scmpCtx, sec.Sys("write"), sec.ScmpActAllow,
sec.FilterArgs{[]sec.Filter{{0, sec.EQ, STDOUT_FILENO}}},
sec.FilterArgs{[]sec.Filter{{2, sec.LE, BUFLEN}}},
)
sec.ScmpLoad(scmpCtx)
fmt.Printf("8888888\n") //ok
}
func writeErr(args []string) {
scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("exit"), sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("exit_group"), sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("write"), sec.ScmpActAllow,
sec.FilterArgs{[]sec.Filter{{0, sec.EQ, STDOUT_FILENO}}},
sec.FilterArgs{[]sec.Filter{{2, sec.LE, BUFLEN}}},
)
sec.ScmpLoad(scmpCtx)
// bad system call
fmt.Printf("99999999\n")
}
func socketOk(args []string) {
scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow)
//for 386, the next line is same as
//sec.ScmpAdd(scmpCtx, sec.Sys("socketcall"), sec.ScmpActAllow,
// sec.FilterArgs{[]sec.Filter{{0, sec.EQ, 1}}},
//)
//SYS_SOCKET = 1
sec.ScmpAdd(scmpCtx, sec.Sys("socket"), sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("exit"), sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("exit_group"), sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("write"), sec.ScmpActAllow,
sec.FilterArgs{[]sec.Filter{{0, sec.EQ, STDOUT_FILENO}}},
sec.FilterArgs{[]sec.Filter{{2, sec.LE, BUFLEN}}},
)
sec.ScmpLoad(scmpCtx)
syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP)
fmt.Printf("Sock ok\n")
}
func socketErr(args []string) {
scmpCtx, _ := sec.ScmpInit(sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("exit"), sec.ScmpActAllow)
sec.ScmpAdd(scmpCtx, sec.Sys("exit_group"), sec.ScmpActAllow)
sec.ScmpLoad(scmpCtx)
// bad system call
syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_IP)
}
func main() {
flag.Parse()
if 1 == flag.NArg() {
idx := 0
args := os.Args[(idx + 1):]
if flag.Arg(idx) == "writeOk" {
writeOk(args)
} else if flag.Arg(idx) == "writeErr" {
writeErr(args)
} else if flag.Arg(idx) == "socketOk" {
socketOk(args)
} else if flag.Arg(idx) == "socketErr" {
socketErr(args)
}
}
}

View File

@ -1,117 +0,0 @@
// +build linux
// +build 386
package seccomp
import (
"errors"
)
var (
syscallInterval = 100
ipcNr = syscallInterval + 0
socketcallNr = syscallInterval + ipcNr
callipc = 0
callsocket = 0
)
func scmpAdd386(ctx *ScmpCtx, call int, action int, args ...FilterArgs) error {
var syscallNo int
pseCall := call - sysCallMax
if (pseCall >= ipcNr) && (pseCall < ipcNr+syscallInterval) {
syscallNo, _ = syscallMap["ipc"]
pseCall = (pseCall - ipcNr) % ipcNr
} else if (pseCall >= socketcallNr) && (pseCall < socketcallNr+syscallInterval) {
syscallNo, _ = syscallMap["socketcall"]
pseCall = (pseCall - socketcallNr) % socketcallNr
} else {
return errors.New("scmpAdd386, syscall error")
}
act, exists := ctx.CallMap[syscallNo]
if !exists {
newArg := make([]FilterArgs, len(args)+1)
newArg[0].Args = make([]Filter, 1)
newArg[0].Args[0].Op = EQ
newArg[0].Args[0].Arg = 0
newArg[0].Args[0].V = uint(pseCall)
for i := 0; i < len(args); i++ {
alen := len(args[i].Args)
if alen > 0 {
newArg[i+1].Args = make([]Filter, alen)
for j := 0; j < alen; i++ {
newArg[i+1].Args[j].Op = args[i].Args[j].Op
newArg[i+1].Args[j].Arg = args[i].Args[j].Arg
newArg[i+1].Args[j].V = args[i].Args[j].V
}
}
}
ctx.CallMap[syscallNo] = &Action{action, newArg}
} else {
newArg := make([]FilterArgs, len(args))
for i := 0; i < len(args); i++ {
alen := len(args[i].Args)
if alen > 0 {
newArg[i].Args = make([]Filter, alen)
for j := 0; j < alen; i++ {
newArg[i].Args[j].Op = args[i].Args[j].Op
newArg[i].Args[j].Arg = args[i].Args[j].Arg
newArg[i].Args[j].V = args[i].Args[j].V
}
}
}
act.args = CombineArgs(act.args, newArg)
}
return nil
}
func resetCallipc(call string, num int) {
syscallMap[call] = num + callipc
}
func resetCallsocket(call string, num int) {
syscallMap[call] = num + callsocket
}
func init() {
sysCallMax = 340
callipc = ipcNr + sysCallMax
callsocket = socketcallNr + sysCallMax
secAdd = scmpAdd386
resetCallipc("semop", 1)
resetCallipc("semget", 2)
resetCallipc("semctl", 3)
resetCallipc("semtimedop", 4)
resetCallipc("msgsnd", 11)
resetCallipc("msgrcv", 12)
resetCallipc("msgget", 13)
resetCallipc("msgctl", 14)
resetCallipc("shmat", 21)
resetCallipc("shmdt", 22)
resetCallipc("shmget", 23)
resetCallipc("shmctl", 24)
resetCallsocket("socket", 1)
resetCallsocket("bind", 2)
resetCallsocket("connect", 3)
resetCallsocket("listen", 4)
resetCallsocket("accept", 5)
resetCallsocket("getsockname", 6)
resetCallsocket("getpeername", 7)
resetCallsocket("socketpair", 8)
resetCallsocket("send", 9)
resetCallsocket("recv", 10)
resetCallsocket("sendto", 11)
resetCallsocket("recvfrom", 12)
resetCallsocket("shutdown", 13)
resetCallsocket("setsockopt", 14)
resetCallsocket("getsockopt", 15)
resetCallsocket("sendmsg", 16)
resetCallsocket("recvmsg", 17)
resetCallsocket("accept4", 18)
resetCallsocket("recvmmsg", 19)
resetCallsocket("sendmmsg", 20)
}

View File

@ -1,58 +0,0 @@
package seccomp
import (
"fmt"
"os/exec"
"testing"
)
var osec = "/go/src/seccomp_main.go"
func secMain(t *testing.T, args []string) {
if len(args) < 1 {
return
}
cmd := args[0]
path := "go"
argv := []string{"run", osec}
argv = append(argv, args[0:]...)
c := exec.Command(path, argv...)
_, err := c.Output()
fmt.Printf("do %s, err is [%v]\n", cmd, err)
if err != nil {
if "writeOk" == cmd || "socketOk" == cmd {
t.Fatal(err)
}
} else {
if "writeErr" == cmd || "socketErr" == cmd {
t.Fatal(err)
}
}
}
func commandGC(file string) {
c := exec.Command("rm", "-rf", file)
d, _ := c.Output()
fmt.Println(string(d))
}
func cp(src, dst string) {
c := exec.Command("cp", "-ra", src, dst)
d, _ := c.Output()
fmt.Println(string(d))
}
func TestSeccomp(t *testing.T) {
//hard code
cp("../seccomp", "/go/src/")
cp("./seccomp.test", osec)
defer commandGC("/go/src/seccomp")
defer commandGC(osec)
secMain(t, []string{"writeOk"})
secMain(t, []string{"writeErr"})
secMain(t, []string{"socketOk"})
secMain(t, []string{"socketErr"})
}

View File

@ -1,390 +0,0 @@
//x86_64
package seccomp
var syscallMap = map[string] int {
"access" : 21,
"chdir" : 80,
"chmod" : 90,
"chown" : 92,
"chown32" : -1,
"close" : 3,
"creat" : 85,
"dup" : 32,
"dup2" : 33,
"dup3" : 292,
"epoll_create" : 213,
"epoll_create1" : 291,
"epoll_ctl" : 233,
"epoll_ctl_old" : 214,
"epoll_pwait" : 281,
"epoll_wait" : 232,
"epoll_wait_old" : 215,
"eventfd" : 284,
"eventfd2" : 290,
"faccessat" : 269,
"fadvise64" : 221,
"fadvise64_64" : -1,
"fallocate" : 285,
"fanotify_init" : 300,
"fanotify_mark" : 301,
"ioctl" : 16,
"fchdir" : 81,
"fchmod" : 91,
"fchmodat" : 268,
"fchown" : 93,
"fchown32" : -1,
"fchownat" : 260,
"fcntl" : 72,
"fcntl64" : -1,
"fdatasync" : 75,
"fgetxattr" : 193,
"flistxattr" : 196,
"flock" : 73,
"fremovexattr" : 199,
"fsetxattr" : 190,
"fstat" : 5,
"fstat64" : -1,
"fstatat64" : -1,
"fstatfs" : 138,
"fstatfs64" : -1,
"fsync" : 74,
"ftruncate" : 77,
"ftruncate64" : -1,
"getcwd" : 79,
"getdents" : 78,
"getdents64" : 217,
"getxattr" : 191,
"inotify_add_watch" : 254,
"inotify_init" : 253,
"inotify_init1" : 294,
"inotify_rm_watch" : 255,
"io_cancel" : 210,
"io_destroy" : 207,
"io_getevents" : 208,
"io_setup" : 206,
"io_submit" : 209,
"lchown" : 94,
"lchown32" : -1,
"lgetxattr" : 192,
"link" : 86,
"linkat" : 265,
"listxattr" : 194,
"llistxattr" : 195,
"llseek" : -1,
"_llseek" : -1,
"lremovexattr" : 198,
"lseek" : 8,
"lsetxattr" : 189,
"lstat" : 6,
"lstat64" : -1,
"mkdir" : 83,
"mkdirat" : 258,
"mknod" : 133,
"mknodat" : 259,
"newfstatat" : 262,
"_newselect" : -1,
"oldfstat" : -1,
"oldlstat" : -1,
"oldolduname" : -1,
"oldstat" : -1,
"olduname" : -1,
"oldwait4" : -1,
"open" : 2,
"openat" : 257,
"pipe" : 22,
"pipe2" : 293,
"poll" : 7,
"ppoll" : 271,
"pread64" : 17,
"preadv" : 295,
"futimesat" : 261,
"pselect6" : 270,
"pwrite64" : 18,
"pwritev" : 296,
"read" : 0,
"readahead" : 187,
"readdir" : -1,
"readlink" : 89,
"readlinkat" : 267,
"readv" : 19,
"removexattr" : 197,
"rename" : 82,
"renameat" : 264,
"rmdir" : 84,
"select" : 23,
"sendfile" : 40,
"sendfile64" : -1,
"setxattr" : 188,
"splice" : 275,
"stat" : 4,
"stat64" : -1,
"statfs" : 137,
"statfs64" : -1,
"symlink" : 88,
"symlinkat" : 266,
"sync" : 162,
"sync_file_range" : 277,
"sync_file_range2" : -1,
"syncfs" : 306,
"tee" : 276,
"truncate" : 76,
"truncate64" : -1,
"umask" : 95,
"unlink" : 87,
"unlinkat" : 263,
"ustat" : 136,
"utime" : 132,
"utimensat" : 280,
"utimes" : 235,
"write" : 1,
"writev" : 20,
"accept" : 43,
"accept4" : 288,
"bind" : 49,
"connect" : 42,
"getpeername" : 52,
"getsockname" : 51,
"getsockopt" : 55,
"listen" : 50,
"recv" : -1,
"recvfrom" : 45,
"recvmmsg" : 299,
"recvmsg" : 47,
"send" : -1,
"sendmmsg" : 307,
"sendmsg" : 46,
"sendto" : 44,
"setsockopt" : 54,
"shutdown" : 48,
"socket" : 41,
"socketcall" : -1,
"socketpair" : 53,
"sethostname" : 170,
"pause" : 34,
"rt_sigaction" : 13,
"rt_sigpending" : 127,
"rt_sigprocmask" : 14,
"rt_sigqueueinfo" : 129,
"rt_sigreturn" : 15,
"rt_sigsuspend" : 130,
"rt_sigtimedwait" : 128,
"rt_tgsigqueueinfo" : 297,
"sigaction" : -1,
"sigaltstack" : 131,
"signal" : -1,
"signalfd" : 282,
"signalfd4" : 289,
"sigpending" : -1,
"sigprocmask" : -1,
"sigreturn" : -1,
"sigsuspend" : -1,
"alarm" : 37,
"brk" : 12,
"clock_adjtime" : 305,
"clock_getres" : 229,
"clock_gettime" : 228,
"clock_nanosleep" : 230,
"clock_settime" : 227,
"gettimeofday" : 96,
"nanosleep" : 35,
"nice" : -1,
"sysinfo" : 99,
"syslog" : 103,
"time" : 201,
"timer_create" : 222,
"timer_delete" : 226,
"timerfd_create" : 283,
"timerfd_gettime" : 287,
"timerfd_settime" : 286,
"timer_getoverrun" : 225,
"timer_gettime" : 224,
"timer_settime" : 223,
"times" : 100,
"uname" : 63,
"madvise" : 28,
"mbind" : 237,
"mincore" : 27,
"mlock" : 149,
"mlockall" : 151,
"mmap" : 9,
"mmap2" : -1,
"mprotect" : 10,
"mremap" : 25,
"msync" : 26,
"munlock" : 150,
"munlockall" : 152,
"munmap" : 11,
"remap_file_pages" : 216,
"set_mempolicy" : 238,
"vmsplice" : 278,
"capget" : 125,
"capset" : 126,
"clone" : 56,
"execve" : 59,
"exit" : 60,
"exit_group" : 231,
"fork" : 57,
"getcpu" : 309,
"getpgid" : 121,
"getpgrp" : 111,
"getpid" : 39,
"getppid" : 110,
"getpriority" : 140,
"getresgid" : 120,
"getresgid32" : -1,
"getresuid" : 118,
"getresuid32" : -1,
"getrlimit" : 97,
"getrusage" : 98,
"getsid" : 124,
"getuid" : 102,
"getuid32" : -1,
"getegid" : 108,
"getegid32" : -1,
"geteuid" : 107,
"geteuid32" : -1,
"getgid" : 104,
"getgid32" : -1,
"getgroups" : 115,
"getgroups32" : -1,
"getitimer" : 36,
"get_mempolicy" : 239,
"kill" : 62,
"prctl" : 157,
"prlimit64" : 302,
"sched_getaffinity" : 204,
"sched_getparam" : 143,
"sched_get_priority_max" : 146,
"sched_get_priority_min" : 147,
"sched_getscheduler" : 145,
"sched_rr_get_interval" : 148,
"sched_setaffinity" : 203,
"sched_setparam" : 142,
"sched_setscheduler" : 144,
"sched_yield" : 24,
"setfsgid" : 123,
"setfsgid32" : -1,
"setfsuid" : 122,
"setfsuid32" : -1,
"setgid" : 106,
"setgid32" : -1,
"setgroups" : 116,
"setgroups32" : -1,
"setitimer" : 38,
"setpgid" : 109,
"setpriority" : 141,
"setregid" : 114,
"setregid32" : -1,
"setresgid" : 119,
"setresgid32" : -1,
"setresuid" : 117,
"setresuid32" : -1,
"setreuid" : 113,
"setreuid32" : -1,
"setrlimit" : 160,
"setsid" : 112,
"setuid" : 105,
"setuid32" : -1,
"ugetrlimit" : -1,
"vfork" : 58,
"wait4" : 61,
"waitid" : 247,
"waitpid" : -1,
"ipc" : -1,
"mq_getsetattr" : 245,
"mq_notify" : 244,
"mq_open" : 240,
"mq_timedreceive" : 243,
"mq_timedsend" : 242,
"mq_unlink" : 241,
"msgctl" : 71,
"msgget" : 68,
"msgrcv" : 70,
"msgsnd" : 69,
"semctl" : 66,
"semget" : 64,
"semop" : 65,
"semtimedop" : 220,
"shmat" : 30,
"shmctl" : 31,
"shmdt" : 67,
"shmget" : 29,
"arch_prctl" : 158,
"get_robust_list" : 274,
"get_thread_area" : 211,
"gettid" : 186,
"futex" : 202,
"restart_syscall" : 219,
"set_robust_list" : 273,
"set_thread_area" : 205,
"set_tid_address" : 218,
"tgkill" : 234,
"tkill" : 200,
"acct" : 163,
"adjtimex" : 159,
"bdflush" : -1,
"chroot" : 161,
"create_module" : 174,
"delete_module" : 176,
"get_kernel_syms" : 177,
"idle" : -1,
"init_module" : 175,
"ioperm" : 173,
"iopl" : 172,
"ioprio_get" : 252,
"ioprio_set" : 251,
"kexec_load" : 246,
"lookup_dcookie" : 212,
"migrate_pages" : 256,
"modify_ldt" : 154,
"mount" : 165,
"move_pages" : 279,
"name_to_handle_at" : 303,
"nfsservctl" : 180,
"open_by_handle_at" : 304,
"perf_event_open" : 298,
"pivot_root" : 155,
"process_vm_readv" : 310,
"process_vm_writev" : 311,
"ptrace" : 101,
"query_module" : 178,
"quotactl" : 179,
"reboot" : 169,
"setdomainname" : 171,
"setns" : 308,
"settimeofday" : 164,
"sgetmask" : -1,
"ssetmask" : -1,
"stime" : -1,
"swapoff" : 168,
"swapon" : 167,
"_sysctl" : 156,
"sysfs" : 139,
"sys_setaltroot" : -1,
"umount" : -1,
"umount2" : 166,
"unshare" : 272,
"uselib" : 134,
"vhangup" : 153,
"vm86" : -1,
"vm86old" : -1,
"add_key" : 248,
"keyctl" : 250,
"request_key" : 249,
"afs_syscall" : 183,
"break" : -1,
"ftime" : -1,
"getpmsg" : 181,
"gtty" : -1,
"lock" : -1,
"madvise1" : -1,
"mpx" : -1,
"prof" : -1,
"profil" : -1,
"putpmsg" : 182,
"security" : 185,
"stty" : -1,
"tuxcall" : 184,
"ulimit" : -1,
"vserver" : 236,
}

View File

@ -21,16 +21,20 @@ var setNsMap = map[string]uintptr{
"linux/s390x": 339, "linux/s390x": 339,
} }
var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
func SysSetns() uint32 {
return uint32(sysSetns)
}
func Setns(fd uintptr, flags uintptr) error { func Setns(fd uintptr, flags uintptr) error {
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)] ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
if !exists { if !exists {
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
} }
_, _, err := syscall.RawSyscall(ns, fd, flags, 0) _, _, err := syscall.RawSyscall(ns, fd, flags, 0)
if err != 0 { if err != 0 {
return err return err
} }
return nil return nil
} }