Merge pull request #70 from mheon/seccomp

Convert Seccomp support to use Libseccomp
This commit is contained in:
Mrunal Patel 2015-08-21 12:25:33 -07:00
commit e7663a673e
20 changed files with 2220 additions and 590 deletions

4
Godeps/Godeps.json generated
View File

@ -55,6 +55,10 @@
"ImportPath": "github.com/opencontainers/specs", "ImportPath": "github.com/opencontainers/specs",
"Rev": "da9240a7125f601aef46f66ea615177607b00d39" "Rev": "da9240a7125f601aef46f66ea615177607b00d39"
}, },
{
"ImportPath": "github.com/seccomp/libseccomp-golang",
"Rev": "1b506fc7c24eec5a3693cdcbed40d9c226cfc6a1"
},
{ {
"ImportPath": "github.com/syndtr/gocapability/capability", "ImportPath": "github.com/syndtr/gocapability/capability",
"Rev": "e55e5833692b49e49a0073ad5baf7803f21bebf4" "Rev": "e55e5833692b49e49a0073ad5baf7803f21bebf4"

View File

@ -0,0 +1,22 @@
Copyright (c) 2015 Matthew Heon <mheon@redhat.com>
Copyright (c) 2015 Paul Moore <pmoore@redhat.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,26 @@
libseccomp-golang: Go Language Bindings for the libseccomp Project
===============================================================================
https://github.com/seccomp/libseccomp-golang
https://github.com/seccomp/libseccomp
The libseccomp library provides an easy to use, platform independent, interface
to the Linux Kernel's syscall filtering mechanism. The libseccomp API is
designed to abstract away the underlying BPF based syscall filter language and
present a more conventional function-call based filtering interface that should
be familiar to, and easily adopted by, application developers.
The libseccomp-golang library provides a Go based interface to the libseccomp
library.
* Online Resources
The library source repository currently lives on GitHub at the following URLs:
-> https://github.com/seccomp/libseccomp-golang
-> https://github.com/seccomp/libseccomp
The project mailing list is currently hosted on Google Groups at the URL below,
please note that a Google account is not required to subscribe to the mailing
list.
-> https://groups.google.com/d/forum/libseccomp

View File

@ -0,0 +1,827 @@
// +build linux
// Public API specification for libseccomp Go bindings
// Contains public API for the bindings
// Package seccomp rovides bindings for libseccomp, a library wrapping the Linux
// seccomp syscall. Seccomp enables an application to restrict system call use
// for itself and its children.
package seccomp
import (
"fmt"
"os"
"runtime"
"strings"
"sync"
"syscall"
"unsafe"
)
// C wrapping code
// #cgo LDFLAGS: -lseccomp
// #include <stdlib.h>
// #include <seccomp.h>
import "C"
// Exported types
// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a
// per-architecture basis.
type ScmpArch uint
// ScmpAction represents an action to be taken on a filter rule match in
// libseccomp
type ScmpAction uint
// ScmpCompareOp represents a comparison operator which can be used in a filter
// rule
type ScmpCompareOp uint
// ScmpCondition represents a rule in a libseccomp filter context
type ScmpCondition struct {
Argument uint `json:"argument,omitempty"`
Op ScmpCompareOp `json:"operator,omitempty"`
Operand1 uint64 `json:"operand_one,omitempty"`
Operand2 uint64 `json:"operand_two,omitempty"`
}
// ScmpSyscall represents a Linux System Call
type ScmpSyscall int32
// Exported Constants
const (
// Valid architectures recognized by libseccomp
// ARM64 and all MIPS architectures are unsupported by versions of the
// library before v2.2 and will return errors if used
// ArchInvalid is a placeholder to ensure uninitialized ScmpArch
// variables are invalid
ArchInvalid ScmpArch = iota
// ArchNative is the native architecture of the kernel
ArchNative ScmpArch = iota
// ArchX86 represents 32-bit x86 syscalls
ArchX86 ScmpArch = iota
// ArchAMD64 represents 64-bit x86-64 syscalls
ArchAMD64 ScmpArch = iota
// ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers)
ArchX32 ScmpArch = iota
// ArchARM represents 32-bit ARM syscalls
ArchARM ScmpArch = iota
// ArchARM64 represents 64-bit ARM syscalls
ArchARM64 ScmpArch = iota
// ArchMIPS represents 32-bit MIPS syscalls
ArchMIPS ScmpArch = iota
// ArchMIPS64 represents 64-bit MIPS syscalls
ArchMIPS64 ScmpArch = iota
// ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers)
ArchMIPS64N32 ScmpArch = iota
// ArchMIPSEL represents 32-bit MIPS syscalls (little endian)
ArchMIPSEL ScmpArch = iota
// ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian)
ArchMIPSEL64 ScmpArch = iota
// ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian,
// 32-bit pointers)
ArchMIPSEL64N32 ScmpArch = iota
)
const (
// Supported actions on filter match
// ActInvalid is a placeholder to ensure uninitialized ScmpAction
// variables are invalid
ActInvalid ScmpAction = iota
// ActKill kills the process
ActKill ScmpAction = iota
// ActTrap throws SIGSYS
ActTrap ScmpAction = iota
// ActErrno causes the syscall to return a negative error code. This
// code can be set with the SetReturnCode method
ActErrno ScmpAction = iota
// ActTrace causes the syscall to notify tracing processes with the
// given error code. This code can be set with the SetReturnCode method
ActTrace ScmpAction = iota
// ActAllow permits the syscall to continue execution
ActAllow ScmpAction = iota
)
const (
// These are comparison operators used in conditional seccomp rules
// They are used to compare the value of a single argument of a syscall
// against a user-defined constant
// CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp
// variables are invalid
CompareInvalid ScmpCompareOp = iota
// CompareNotEqual returns true if the argument is not equal to the
// given value
CompareNotEqual ScmpCompareOp = iota
// CompareLess returns true if the argument is less than the given value
CompareLess ScmpCompareOp = iota
// CompareLessOrEqual returns true if the argument is less than or equal
// to the given value
CompareLessOrEqual ScmpCompareOp = iota
// CompareEqual returns true if the argument is equal to the given value
CompareEqual ScmpCompareOp = iota
// CompareGreaterEqual returns true if the argument is greater than or
// equal to the given value
CompareGreaterEqual ScmpCompareOp = iota
// CompareGreater returns true if the argument is greater than the given
// value
CompareGreater ScmpCompareOp = iota
// CompareMaskedEqual returns true if the argument is equal to the given
// value, when masked (bitwise &) against the second given value
CompareMaskedEqual ScmpCompareOp = iota
)
// Helpers for types
// GetArchFromString returns an ScmpArch constant from a string representing an
// architecture
func GetArchFromString(arch string) (ScmpArch, error) {
switch strings.ToLower(arch) {
case "x86":
return ArchX86, nil
case "amd64", "x86-64", "x86_64", "x64":
return ArchAMD64, nil
case "x32":
return ArchX32, nil
case "arm":
return ArchARM, nil
case "arm64", "aarch64":
return ArchARM64, nil
case "mips":
return ArchMIPS, nil
case "mips64":
return ArchMIPS64, nil
case "mips64n32":
return ArchMIPS64N32, nil
case "mipsel":
return ArchMIPSEL, nil
case "mipsel64":
return ArchMIPSEL64, nil
case "mipsel64n32":
return ArchMIPSEL64N32, nil
default:
return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %s", arch)
}
}
// String returns a string representation of an architecture constant
func (a ScmpArch) String() string {
switch a {
case ArchX86:
return "x86"
case ArchAMD64:
return "amd64"
case ArchX32:
return "x32"
case ArchARM:
return "arm"
case ArchARM64:
return "arm64"
case ArchMIPS:
return "mips"
case ArchMIPS64:
return "mips64"
case ArchMIPS64N32:
return "mips64n32"
case ArchMIPSEL:
return "mipsel"
case ArchMIPSEL64:
return "mipsel64"
case ArchMIPSEL64N32:
return "mipsel64n32"
case ArchNative:
return "native"
case ArchInvalid:
return "Invalid architecture"
default:
return "Unknown architecture"
}
}
// String returns a string representation of a comparison operator constant
func (a ScmpCompareOp) String() string {
switch a {
case CompareNotEqual:
return "Not equal"
case CompareLess:
return "Less than"
case CompareLessOrEqual:
return "Less than or equal to"
case CompareEqual:
return "Equal"
case CompareGreaterEqual:
return "Greater than or equal to"
case CompareGreater:
return "Greater than"
case CompareMaskedEqual:
return "Masked equality"
case CompareInvalid:
return "Invalid comparison operator"
default:
return "Unrecognized comparison operator"
}
}
// String returns a string representation of a seccomp match action
func (a ScmpAction) String() string {
switch a & 0xFFFF {
case ActKill:
return "Action: Kill Process"
case ActTrap:
return "Action: Send SIGSYS"
case ActErrno:
return fmt.Sprintf("Action: Return error code %d", (a >> 16))
case ActTrace:
return fmt.Sprintf("Action: Notify tracing processes with code %d",
(a >> 16))
case ActAllow:
return "Action: Allow system call"
default:
return "Unrecognized Action"
}
}
// SetReturnCode adds a return code to a supporting ScmpAction, clearing any
// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise.
// Accepts 16-bit return code as argument.
// Returns a valid ScmpAction of the original type with the new error code set.
func (a ScmpAction) SetReturnCode(code int16) ScmpAction {
aTmp := a & 0x0000FFFF
if aTmp == ActErrno || aTmp == ActTrace {
return (aTmp | (ScmpAction(code)&0xFFFF)<<16)
}
return a
}
// GetReturnCode returns the return code of an ScmpAction
func (a ScmpAction) GetReturnCode() int16 {
return int16(a >> 16)
}
// General utility functions
// GetLibraryVersion returns the version of the library the bindings are built
// against.
// The version is formatted as follows: Major.Minor.Micro
func GetLibraryVersion() (major, minor, micro int) {
return verMajor, verMinor, verMicro
}
// Syscall functions
// GetName retrieves the name of a syscall from its number.
// Acts on any syscall number.
// Returns either a string containing the name of the syscall, or an error.
func (s ScmpSyscall) GetName() (string, error) {
return s.GetNameByArch(ArchNative)
}
// GetNameByArch retrieves the name of a syscall from its number for a given
// architecture.
// Acts on any syscall number.
// Accepts a valid architecture constant.
// Returns either a string containing the name of the syscall, or an error.
// if the syscall is unrecognized or an issue occurred.
func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) {
if err := sanitizeArch(arch); err != nil {
return "", err
}
cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s))
if cString == nil {
return "", fmt.Errorf("could not resolve syscall name")
}
defer C.free(unsafe.Pointer(cString))
finalStr := C.GoString(cString)
return finalStr, nil
}
// GetSyscallFromName returns the number of a syscall by name on the kernel's
// native architecture.
// Accepts a string containing the name of a syscall.
// Returns the number of the syscall, or an error if no syscall with that name
// was found.
func GetSyscallFromName(name string) (ScmpSyscall, error) {
cString := C.CString(name)
defer C.free(unsafe.Pointer(cString))
result := C.seccomp_syscall_resolve_name(cString)
if result == scmpError {
return 0, fmt.Errorf("could not resolve name to syscall")
}
return ScmpSyscall(result), nil
}
// GetSyscallFromNameByArch returns the number of a syscall by name for a given
// architecture's ABI.
// Accepts the name of a syscall and an architecture constant.
// Returns the number of the syscall, or an error if an invalid architecture is
// passed or a syscall with that name was not found.
func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) {
if err := sanitizeArch(arch); err != nil {
return 0, err
}
cString := C.CString(name)
defer C.free(unsafe.Pointer(cString))
result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString)
if result == scmpError {
return 0, fmt.Errorf("could not resolve name to syscall")
}
return ScmpSyscall(result), nil
}
// MakeCondition creates and returns a new condition to attach to a filter rule.
// Associated rules will only match if this condition is true.
// Accepts the number the argument we are checking, and a comparison operator
// and value to compare to.
// The rule will match if argument $arg (zero-indexed) of the syscall is
// $COMPARE_OP the provided comparison value.
// Some comparison operators accept two values. Masked equals, for example,
// will mask $arg of the syscall with the second value provided (via bitwise
// AND) and then compare against the first value provided.
// For example, in the less than or equal case, if the syscall argument was
// 0 and the value provided was 1, the condition would match, as 0 is less
// than or equal to 1.
// Return either an error on bad argument or a valid ScmpCondition struct.
func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) {
var condStruct ScmpCondition
if comparison == CompareInvalid {
return condStruct, fmt.Errorf("invalid comparison operator")
} else if arg > 5 {
return condStruct, fmt.Errorf("syscalls only have up to 6 arguments")
} else if len(values) > 2 {
return condStruct, fmt.Errorf("conditions can have at most 2 arguments")
} else if len(values) == 0 {
return condStruct, fmt.Errorf("must provide at least one value to compare against")
}
condStruct.Argument = arg
condStruct.Op = comparison
condStruct.Operand1 = values[0]
if len(values) == 2 {
condStruct.Operand2 = values[1]
} else {
condStruct.Operand2 = 0 // Unused
}
return condStruct, nil
}
// Utility Functions
// GetNativeArch returns architecture token representing the native kernel
// architecture
func GetNativeArch() (ScmpArch, error) {
arch := C.seccomp_arch_native()
return archFromNative(arch)
}
// Public Filter API
// ScmpFilter represents a filter context in libseccomp.
// A filter context is initially empty. Rules can be added to it, and it can
// then be loaded into the kernel.
type ScmpFilter struct {
filterCtx C.scmp_filter_ctx
valid bool
lock sync.Mutex
}
// NewFilter creates and returns a new filter context.
// Accepts a default action to be taken for syscalls which match no rules in
// the filter.
// Returns a reference to a valid filter context, or nil and an error if the
// filter context could not be created or an invalid default action was given.
func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) {
if err := sanitizeAction(defaultAction); err != nil {
return nil, err
}
fPtr := C.seccomp_init(defaultAction.toNative())
if fPtr == nil {
return nil, fmt.Errorf("could not create filter")
}
filter := new(ScmpFilter)
filter.filterCtx = fPtr
filter.valid = true
runtime.SetFinalizer(filter, filterFinalizer)
return filter, nil
}
// IsValid determines whether a filter context is valid to use.
// Some operations (Release and Merge) render filter contexts invalid and
// consequently prevent further use.
func (f *ScmpFilter) IsValid() bool {
f.lock.Lock()
defer f.lock.Unlock()
return f.valid
}
// Reset resets a filter context, removing all its existing state.
// Accepts a new default action to be taken for syscalls which do not match.
// Returns an error if the filter or action provided are invalid.
func (f *ScmpFilter) Reset(defaultAction ScmpAction) error {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeAction(defaultAction); err != nil {
return err
} else if !f.valid {
return errBadFilter
}
retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative())
if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// Release releases a filter context, freeing its memory. Should be called after
// loading into the kernel, when the filter is no longer needed.
// After calling this function, the given filter is no longer valid and cannot
// be used.
// Release() will be invoked automatically when a filter context is garbage
// collected, but can also be called manually to free memory.
func (f *ScmpFilter) Release() {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return
}
f.valid = false
C.seccomp_release(f.filterCtx)
}
// Merge merges two filter contexts.
// The source filter src will be released as part of the process, and will no
// longer be usable or valid after this call.
// To be merged, filters must NOT share any architectures, and all their
// attributes (Default Action, Bad Arch Action, No New Privs and TSync bools)
// must match.
// The filter src will be merged into the filter this is called on.
// The architectures of the src filter not present in the destination, and all
// associated rules, will be added to the destination.
// Returns an error if merging the filters failed.
func (f *ScmpFilter) Merge(src *ScmpFilter) error {
f.lock.Lock()
defer f.lock.Unlock()
src.lock.Lock()
defer src.lock.Unlock()
if !src.valid || !f.valid {
return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized")
}
// Merge the filters
retCode := C.seccomp_merge(f.filterCtx, src.filterCtx)
if syscall.Errno(-1*retCode) == syscall.EINVAL {
return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter")
} else if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
src.valid = false
return nil
}
// IsArchPresent checks if an architecture is present in a filter.
// If a filter contains an architecture, it uses its default action for
// syscalls which do not match rules in it, and its rules can match syscalls
// for that ABI.
// If a filter does not contain an architecture, all syscalls made to that
// kernel ABI will fail with the filter's default Bad Architecture Action
// (by default, killing the process).
// Accepts an architecture constant.
// Returns true if the architecture is present in the filter, false otherwise,
// and an error on an invalid filter context, architecture constant, or an
// issue with the call to libseccomp.
func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeArch(arch); err != nil {
return false, err
} else if !f.valid {
return false, errBadFilter
}
retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative())
if syscall.Errno(-1*retCode) == syscall.EEXIST {
// -EEXIST is "arch not present"
return false, nil
} else if retCode != 0 {
return false, syscall.Errno(-1 * retCode)
}
return true, nil
}
// AddArch adds an architecture to the filter.
// Accepts an architecture constant.
// Returns an error on invalid filter context or architecture token, or an
// issue with the call to libseccomp.
func (f *ScmpFilter) AddArch(arch ScmpArch) error {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeArch(arch); err != nil {
return err
} else if !f.valid {
return errBadFilter
}
// Libseccomp returns -EEXIST if the specified architecture is already
// present. Succeed silently in this case, as it's not fatal, and the
// architecture is present already.
retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative())
if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
return syscall.Errno(-1 * retCode)
}
return nil
}
// RemoveArch removes an architecture from the filter.
// Accepts an architecture constant.
// Returns an error on invalid filter context or architecture token, or an
// issue with the call to libseccomp.
func (f *ScmpFilter) RemoveArch(arch ScmpArch) error {
f.lock.Lock()
defer f.lock.Unlock()
if err := sanitizeArch(arch); err != nil {
return err
} else if !f.valid {
return errBadFilter
}
// Similar to AddArch, -EEXIST is returned if the arch is not present
// Succeed silently in that case, this is not fatal and the architecture
// is not present in the filter after RemoveArch
retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative())
if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST {
return syscall.Errno(-1 * retCode)
}
return nil
}
// Load loads a filter context into the kernel.
// Returns an error if the filter context is invalid or the syscall failed.
func (f *ScmpFilter) Load() error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_load(f.filterCtx); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// GetDefaultAction returns the default action taken on a syscall which does not
// match a rule in the filter, or an error if an issue was encountered
// retrieving the value.
func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) {
action, err := f.getFilterAttr(filterAttrActDefault)
if err != nil {
return 0x0, err
}
return actionFromNative(action)
}
// GetBadArchAction returns the default action taken on a syscall for an
// architecture not in the filter, or an error if an issue was encountered
// retrieving the value.
func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) {
action, err := f.getFilterAttr(filterAttrActBadArch)
if err != nil {
return 0x0, err
}
return actionFromNative(action)
}
// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set
// to on the filter being loaded, or an error if an issue was encountered
// retrieving the value.
// The No New Privileges bit tells the kernel that new processes run with exec()
// cannot gain more privileges than the process that ran exec().
// For example, a process with No New Privileges set would be unable to exec
// setuid/setgid executables.
func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) {
noNewPrivs, err := f.getFilterAttr(filterAttrNNP)
if err != nil {
return false, err
}
if noNewPrivs == 0 {
return false, nil
}
return true, nil
}
// GetTsyncBit returns whether Thread Synchronization will be enabled on the
// filter being loaded, or an error if an issue was encountered retrieving the
// value.
// Thread Sync ensures that all members of the thread group of the calling
// process will share the same Seccomp filter set.
// Tsync is a fairly recent addition to the Linux kernel and older kernels
// lack support. If the running kernel does not support Tsync and it is
// requested in a filter, Libseccomp will not enable TSync support and will
// proceed as normal.
// This function is unavailable before v2.2 of libseccomp and will return an
// error.
func (f *ScmpFilter) GetTsyncBit() (bool, error) {
tSync, err := f.getFilterAttr(filterAttrTsync)
if err != nil {
return false, err
}
if tSync == 0 {
return false, nil
}
return true, nil
}
// SetBadArchAction sets the default action taken on a syscall for an
// architecture not in the filter, or an error if an issue was encountered
// setting the value.
func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error {
if err := sanitizeAction(action); err != nil {
return err
}
return f.setFilterAttr(filterAttrActBadArch, action.toNative())
}
// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be
// applied on filter load, or an error if an issue was encountered setting the
// value.
// Filters with No New Privileges set to 0 can only be loaded if the process
// has the CAP_SYS_ADMIN capability.
func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error {
var toSet C.uint32_t = 0x0
if state {
toSet = 0x1
}
return f.setFilterAttr(filterAttrNNP, toSet)
}
// SetTsync sets whether Thread Synchronization will be enabled on the filter
// being loaded. Returns an error if setting Tsync failed, or the filter is
// invalid.
// Thread Sync ensures that all members of the thread group of the calling
// process will share the same Seccomp filter set.
// Tsync is a fairly recent addition to the Linux kernel and older kernels
// lack support. If the running kernel does not support Tsync and it is
// requested in a filter, Libseccomp will not enable TSync support and will
// proceed as normal.
// This function is unavailable before v2.2 of libseccomp and will return an
// error.
func (f *ScmpFilter) SetTsync(enable bool) error {
var toSet C.uint32_t = 0x0
if enable {
toSet = 0x1
}
return f.setFilterAttr(filterAttrTsync, toSet)
}
// SetSyscallPriority sets a syscall's priority.
// This provides a hint to the filter generator in libseccomp about the
// importance of this syscall. High-priority syscalls are placed
// first in the filter code, and incur less overhead (at the expense of
// lower-priority syscalls).
func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call),
C.uint8_t(priority)); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// AddRule adds a single rule for an unconditional action on a syscall.
// Accepts the number of the syscall and the action to be taken on the call
// being made.
// Returns an error if an issue was encountered adding the rule.
func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error {
return f.addRuleGeneric(call, action, false, nil)
}
// AddRuleExact adds a single rule for an unconditional action on a syscall.
// Accepts the number of the syscall and the action to be taken on the call
// being made.
// No modifications will be made to the rule, and it will fail to add if it
// cannot be applied to the current architecture without modification.
// The rule will function exactly as described, but it may not function identically
// (or be able to be applied to) all architectures.
// Returns an error if an issue was encountered adding the rule.
func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error {
return f.addRuleGeneric(call, action, true, nil)
}
// AddRuleConditional adds a single rule for a conditional action on a syscall.
// Returns an error if an issue was encountered adding the rule.
// All conditions must match for the rule to match.
// There is a bug in library versions below v2.2.1 which can, in some cases,
// cause conditions to be lost when more than one are used. Consequently,
// AddRuleConditional is disabled on library versions lower than v2.2.1
func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
return f.addRuleGeneric(call, action, false, conds)
}
// AddRuleConditionalExact adds a single rule for a conditional action on a
// syscall.
// No modifications will be made to the rule, and it will fail to add if it
// cannot be applied to the current architecture without modification.
// The rule will function exactly as described, but it may not function identically
// (or be able to be applied to) all architectures.
// Returns an error if an issue was encountered adding the rule.
// There is a bug in library versions below v2.2.1 which can, in some cases,
// cause conditions to be lost when more than one are used. Consequently,
// AddRuleConditionalExact is disabled on library versions lower than v2.2.1
func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error {
return f.addRuleGeneric(call, action, true, conds)
}
// ExportPFC output PFC-formatted, human-readable dump of a filter context's
// rules to a file.
// Accepts file to write to (must be open for writing).
// Returns an error if writing to the file fails.
func (f *ScmpFilter) ExportPFC(file *os.File) error {
f.lock.Lock()
defer f.lock.Unlock()
fd := file.Fd()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a
// filter context's rules to a file.
// Accepts file to write to (must be open for writing).
// Returns an error if writing to the file fails.
func (f *ScmpFilter) ExportBPF(file *os.File) error {
f.lock.Lock()
defer f.lock.Unlock()
fd := file.Fd()
if !f.valid {
return errBadFilter
}
if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}

View File

@ -0,0 +1,461 @@
// +build linux
// Internal functions for libseccomp Go bindings
// No exported functions
package seccomp
import (
"fmt"
"os"
"syscall"
)
// Unexported C wrapping code - provides the C-Golang interface
// Get the seccomp header in scope
// Need stdlib.h for free() on cstrings
// #cgo LDFLAGS: -lseccomp
/*
#include <stdlib.h>
#include <seccomp.h>
#if SCMP_VER_MAJOR < 2
#error Minimum supported version of Libseccomp is v2.1.0
#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 1
#error Minimum supported version of Libseccomp is v2.1.0
#endif
#define ARCH_BAD ~0
const uint32_t C_ARCH_BAD = ARCH_BAD;
#ifndef SCMP_ARCH_AARCH64
#define SCMP_ARCH_AARCH64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPS
#define SCMP_ARCH_MIPS ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPS64
#define SCMP_ARCH_MIPS64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPS64N32
#define SCMP_ARCH_MIPS64N32 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPSEL
#define SCMP_ARCH_MIPSEL ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPSEL64
#define SCMP_ARCH_MIPSEL64 ARCH_BAD
#endif
#ifndef SCMP_ARCH_MIPSEL64N32
#define SCMP_ARCH_MIPSEL64N32 ARCH_BAD
#endif
const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE;
const uint32_t C_ARCH_X86 = SCMP_ARCH_X86;
const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64;
const uint32_t C_ARCH_X32 = SCMP_ARCH_X32;
const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM;
const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64;
const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS;
const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64;
const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32;
const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL;
const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64;
const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32;
const uint32_t C_ACT_KILL = SCMP_ACT_KILL;
const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP;
const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0);
const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0);
const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW;
// If TSync is not supported, make sure it doesn't map to a supported filter attribute
// Don't worry about major version < 2, the minimum version checks should catch that case
#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2
#define SCMP_FLTATR_CTL_TSYNC _SCMP_CMP_MIN
#endif
const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT;
const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH;
const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP;
const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC;
const int C_CMP_NE = (int)SCMP_CMP_NE;
const int C_CMP_LT = (int)SCMP_CMP_LT;
const int C_CMP_LE = (int)SCMP_CMP_LE;
const int C_CMP_EQ = (int)SCMP_CMP_EQ;
const int C_CMP_GE = (int)SCMP_CMP_GE;
const int C_CMP_GT = (int)SCMP_CMP_GT;
const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ;
const int C_VERSION_MAJOR = SCMP_VER_MAJOR;
const int C_VERSION_MINOR = SCMP_VER_MINOR;
const int C_VERSION_MICRO = SCMP_VER_MICRO;
typedef struct scmp_arg_cmp* scmp_cast_t;
// Wrapper to create an scmp_arg_cmp struct
void*
make_struct_arg_cmp(
unsigned int arg,
int compare,
uint64_t a,
uint64_t b
)
{
struct scmp_arg_cmp *s = malloc(sizeof(struct scmp_arg_cmp));
s->arg = arg;
s->op = compare;
s->datum_a = a;
s->datum_b = b;
return s;
}
*/
import "C"
// Nonexported types
type scmpFilterAttr uint32
// Nonexported constants
const (
filterAttrActDefault scmpFilterAttr = iota
filterAttrActBadArch scmpFilterAttr = iota
filterAttrNNP scmpFilterAttr = iota
filterAttrTsync scmpFilterAttr = iota
)
const (
// An error return from certain libseccomp functions
scmpError C.int = -1
// Comparison boundaries to check for architecture validity
archStart ScmpArch = ArchNative
archEnd ScmpArch = ArchMIPSEL64N32
// Comparison boundaries to check for action validity
actionStart ScmpAction = ActKill
actionEnd ScmpAction = ActAllow
// Comparison boundaries to check for comparison operator validity
compareOpStart ScmpCompareOp = CompareNotEqual
compareOpEnd ScmpCompareOp = CompareMaskedEqual
)
var (
// Error thrown on bad filter context
errBadFilter = fmt.Errorf("filter is invalid or uninitialized")
// Constants representing library major, minor, and micro versions
verMajor = int(C.C_VERSION_MAJOR)
verMinor = int(C.C_VERSION_MINOR)
verMicro = int(C.C_VERSION_MICRO)
)
// Nonexported functions
// Check if library version is greater than or equal to the given one
func checkVersionAbove(major, minor, micro int) bool {
return (verMajor > major) ||
(verMajor == major && verMinor > minor) ||
(verMajor == major && verMinor == minor && verMicro >= micro)
}
// Init function: Verify library version is appropriate
func init() {
if !checkVersionAbove(2, 1, 0) {
fmt.Fprintf(os.Stderr, "Libseccomp version too low: minimum supported is 2.1.0, detected %d.%d.%d", C.C_VERSION_MAJOR, C.C_VERSION_MINOR, C.C_VERSION_MICRO)
os.Exit(-1)
}
}
// Filter helpers
// Filter finalizer - ensure that kernel context for filters is freed
func filterFinalizer(f *ScmpFilter) {
f.Release()
}
// Get a raw filter attribute
func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return 0x0, errBadFilter
}
if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
return 0x0, fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
}
var attribute C.uint32_t
retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute)
if retCode != 0 {
return 0x0, syscall.Errno(-1 * retCode)
}
return attribute, nil
}
// Set a raw filter attribute
func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if !checkVersionAbove(2, 2, 0) && attr == filterAttrTsync {
return fmt.Errorf("the thread synchronization attribute is not supported in this version of the library")
}
retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value)
if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// DOES NOT LOCK OR CHECK VALIDITY
// Assumes caller has already done this
// Wrapper for seccomp_rule_add_... functions
func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, cond C.scmp_cast_t) error {
var length C.uint
if cond != nil {
length = 1
} else {
length = 0
}
var retCode C.int
if exact {
retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
} else {
retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond)
}
if syscall.Errno(-1*retCode) == syscall.EFAULT {
return fmt.Errorf("unrecognized syscall")
} else if syscall.Errno(-1*retCode) == syscall.EPERM {
return fmt.Errorf("requested action matches default action of filter")
} else if retCode != 0 {
return syscall.Errno(-1 * retCode)
}
return nil
}
// Generic add function for filter rules
func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error {
f.lock.Lock()
defer f.lock.Unlock()
if !f.valid {
return errBadFilter
}
if len(conds) == 0 {
if err := f.addRuleWrapper(call, action, exact, nil); err != nil {
return err
}
} else {
// We don't support conditional filtering in library version v2.1
if !checkVersionAbove(2, 2, 1) {
return fmt.Errorf("conditional filtering requires libseccomp version >= 2.2.1")
}
for _, cond := range conds {
cmpStruct := C.make_struct_arg_cmp(C.uint(cond.Argument), cond.Op.toNative(), C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2))
defer C.free(cmpStruct)
if err := f.addRuleWrapper(call, action, exact, C.scmp_cast_t(cmpStruct)); err != nil {
return err
}
}
}
return nil
}
// Generic Helpers
// Helper - Sanitize Arch token input
func sanitizeArch(in ScmpArch) error {
if in < archStart || in > archEnd {
return fmt.Errorf("unrecognized architecture")
}
if in.toNative() == C.C_ARCH_BAD {
return fmt.Errorf("architecture is not supported on this version of the library")
}
return nil
}
func sanitizeAction(in ScmpAction) error {
inTmp := in & 0x0000FFFF
if inTmp < actionStart || inTmp > actionEnd {
return fmt.Errorf("unrecognized action")
}
if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 {
return fmt.Errorf("highest 16 bits must be zeroed except for Trace and Errno")
}
return nil
}
func sanitizeCompareOp(in ScmpCompareOp) error {
if in < compareOpStart || in > compareOpEnd {
return fmt.Errorf("unrecognized comparison operator")
}
return nil
}
func archFromNative(a C.uint32_t) (ScmpArch, error) {
switch a {
case C.C_ARCH_X86:
return ArchX86, nil
case C.C_ARCH_X86_64:
return ArchAMD64, nil
case C.C_ARCH_X32:
return ArchX32, nil
case C.C_ARCH_ARM:
return ArchARM, nil
case C.C_ARCH_NATIVE:
return ArchNative, nil
case C.C_ARCH_AARCH64:
return ArchARM64, nil
case C.C_ARCH_MIPS:
return ArchMIPS, nil
case C.C_ARCH_MIPS64:
return ArchMIPS64, nil
case C.C_ARCH_MIPS64N32:
return ArchMIPS64N32, nil
case C.C_ARCH_MIPSEL:
return ArchMIPSEL, nil
case C.C_ARCH_MIPSEL64:
return ArchMIPSEL64, nil
case C.C_ARCH_MIPSEL64N32:
return ArchMIPSEL64N32, nil
default:
return 0x0, fmt.Errorf("unrecognized architecture")
}
}
// Only use with sanitized arches, no error handling
func (a ScmpArch) toNative() C.uint32_t {
switch a {
case ArchX86:
return C.C_ARCH_X86
case ArchAMD64:
return C.C_ARCH_X86_64
case ArchX32:
return C.C_ARCH_X32
case ArchARM:
return C.C_ARCH_ARM
case ArchARM64:
return C.C_ARCH_AARCH64
case ArchMIPS:
return C.C_ARCH_MIPS
case ArchMIPS64:
return C.C_ARCH_MIPS64
case ArchMIPS64N32:
return C.C_ARCH_MIPS64N32
case ArchMIPSEL:
return C.C_ARCH_MIPSEL
case ArchMIPSEL64:
return C.C_ARCH_MIPSEL64
case ArchMIPSEL64N32:
return C.C_ARCH_MIPSEL64N32
case ArchNative:
return C.C_ARCH_NATIVE
default:
return 0x0
}
}
// Only use with sanitized ops, no error handling
func (a ScmpCompareOp) toNative() C.int {
switch a {
case CompareNotEqual:
return C.C_CMP_NE
case CompareLess:
return C.C_CMP_LT
case CompareLessOrEqual:
return C.C_CMP_LE
case CompareEqual:
return C.C_CMP_EQ
case CompareGreaterEqual:
return C.C_CMP_GE
case CompareGreater:
return C.C_CMP_GT
case CompareMaskedEqual:
return C.C_CMP_MASKED_EQ
default:
return 0x0
}
}
func actionFromNative(a C.uint32_t) (ScmpAction, error) {
aTmp := a & 0xFFFF
switch a & 0xFFFF0000 {
case C.C_ACT_KILL:
return ActKill, nil
case C.C_ACT_TRAP:
return ActTrap, nil
case C.C_ACT_ERRNO:
return ActErrno.SetReturnCode(int16(aTmp)), nil
case C.C_ACT_TRACE:
return ActTrace.SetReturnCode(int16(aTmp)), nil
case C.C_ACT_ALLOW:
return ActAllow, nil
default:
return 0x0, fmt.Errorf("unrecognized action")
}
}
// Only use with sanitized actions, no error handling
func (a ScmpAction) toNative() C.uint32_t {
switch a & 0xFFFF {
case ActKill:
return C.C_ACT_KILL
case ActTrap:
return C.C_ACT_TRAP
case ActErrno:
return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16)
case ActTrace:
return C.C_ACT_TRACE | (C.uint32_t(a) >> 16)
case ActAllow:
return C.C_ACT_ALLOW
default:
return 0x0
}
}
// Internal only, assumes safe attribute
func (a scmpFilterAttr) toNative() uint32 {
switch a {
case filterAttrActDefault:
return uint32(C.C_ATTRIBUTE_DEFAULT)
case filterAttrActBadArch:
return uint32(C.C_ATTRIBUTE_BADARCH)
case filterAttrNNP:
return uint32(C.C_ATTRIBUTE_NNP)
case filterAttrTsync:
return uint32(C.C_ATTRIBUTE_TSYNC)
default:
return 0x0
}
}

View File

@ -0,0 +1,457 @@
// +build linux
// Tests for public API of libseccomp Go bindings
package seccomp
import (
"fmt"
"syscall"
"testing"
)
// Type Function Tests
func TestActionSetReturnCode(t *testing.T) {
if ActInvalid.SetReturnCode(0x0010) != ActInvalid {
t.Errorf("Able to set a return code on invalid action!")
}
codeSet := ActErrno.SetReturnCode(0x0001)
if codeSet == ActErrno || codeSet.GetReturnCode() != 0x0001 {
t.Errorf("Could not set return code on ActErrno")
}
}
func TestSyscallGetName(t *testing.T) {
call1 := ScmpSyscall(0x1)
callFail := ScmpSyscall(0x999)
name, err := call1.GetName()
if err != nil {
t.Errorf("Error getting syscall name for number 0x1")
} else if len(name) == 0 {
t.Errorf("Empty name returned for syscall 0x1")
}
fmt.Printf("Got name of syscall 0x1 on native arch as %s\n", name)
_, err = callFail.GetName()
if err == nil {
t.Errorf("Getting nonexistant syscall should error!")
}
}
func TestSyscallGetNameByArch(t *testing.T) {
call1 := ScmpSyscall(0x1)
callInvalid := ScmpSyscall(0x999)
archGood := ArchAMD64
archBad := ArchInvalid
name, err := call1.GetNameByArch(archGood)
if err != nil {
t.Errorf("Error getting syscall name for number 0x1 and arch AMD64")
} else if name != "write" {
t.Errorf("Got incorrect name for syscall 0x1 - expected write, got %s", name)
}
_, err = call1.GetNameByArch(archBad)
if err == nil {
t.Errorf("Bad architecture GetNameByArch() should error!")
}
_, err = callInvalid.GetNameByArch(archGood)
if err == nil {
t.Errorf("Bad syscall GetNameByArch() should error!")
}
_, err = callInvalid.GetNameByArch(archBad)
if err == nil {
t.Errorf("Bad syscall and bad arch GetNameByArch() should error!")
}
}
func TestGetSyscallFromName(t *testing.T) {
name1 := "write"
nameInval := "NOTASYSCALL"
syscall, err := GetSyscallFromName(name1)
if err != nil {
t.Errorf("Error getting syscall number of write: %s", err)
}
fmt.Printf("Got syscall number of write on native arch as %d\n", syscall)
_, err = GetSyscallFromName(nameInval)
if err == nil {
t.Errorf("Getting an invalid syscall should error!")
}
}
func TestGetSyscallFromNameByArch(t *testing.T) {
name1 := "write"
nameInval := "NOTASYSCALL"
arch1 := ArchAMD64
archInval := ArchInvalid
syscall, err := GetSyscallFromNameByArch(name1, arch1)
if err != nil {
t.Errorf("Error getting syscall number of write on AMD64: %s", err)
}
fmt.Printf("Got syscall number of write on AMD64 as %d\n", syscall)
_, err = GetSyscallFromNameByArch(nameInval, arch1)
if err == nil {
t.Errorf("Getting invalid syscall with valid arch should error")
}
_, err = GetSyscallFromNameByArch(name1, archInval)
if err == nil {
t.Errorf("Getting valid syscall for invalid arch should error")
}
_, err = GetSyscallFromNameByArch(nameInval, archInval)
if err == nil {
t.Errorf("Getting invalid syscall for invalid arch should error")
}
}
func TestMakeCondition(t *testing.T) {
condition, err := MakeCondition(3, CompareNotEqual, 0x10)
if err != nil {
t.Errorf("Error making condition struct: %s", err)
} else if condition.Argument != 3 || condition.Operand1 != 0x10 ||
condition.Operand2 != 0 || condition.Op != CompareNotEqual {
t.Errorf("Condition struct was filled incorrectly")
}
condition, err = MakeCondition(3, CompareMaskedEqual, 0x10, 0x20)
if err != nil {
t.Errorf("Error making condition struct: %s", err)
} else if condition.Argument != 3 || condition.Operand1 != 0x10 ||
condition.Operand2 != 0x20 || condition.Op != CompareMaskedEqual {
t.Errorf("Condition struct was filled incorrectly")
}
_, err = MakeCondition(7, CompareNotEqual, 0x10)
if err == nil {
t.Errorf("Condition struct with bad syscall argument number should error")
}
_, err = MakeCondition(3, CompareInvalid, 0x10)
if err == nil {
t.Errorf("Condition struct with bad comparison operator should error")
}
_, err = MakeCondition(3, CompareMaskedEqual, 0x10, 0x20, 0x30)
if err == nil {
t.Errorf("MakeCondition with more than 2 arguments should fail")
}
_, err = MakeCondition(3, CompareMaskedEqual)
if err == nil {
t.Errorf("MakeCondition with no arguments should fail")
}
}
// Utility Function Tests
func TestGetNativeArch(t *testing.T) {
arch, err := GetNativeArch()
if err != nil {
t.Errorf("GetNativeArch should not error!")
}
fmt.Printf("Got native arch of system as %s\n", arch.String())
}
// Filter Tests
func TestFilterCreateRelease(t *testing.T) {
_, err := NewFilter(ActInvalid)
if err == nil {
t.Errorf("Can create filter with invalid action")
}
filter, err := NewFilter(ActKill)
if err != nil {
t.Errorf("Error creating filter: %s", err)
}
if !filter.IsValid() {
t.Errorf("Filter created by NewFilter was not valid")
}
filter.Release()
if filter.IsValid() {
t.Errorf("Filter is valid after being released")
}
}
func TestFilterReset(t *testing.T) {
filter, err := NewFilter(ActKill)
if err != nil {
t.Errorf("Error creating filter: %s", err)
}
defer filter.Release()
// Ensure the default action is ActKill
action, err := filter.GetDefaultAction()
if err != nil {
t.Errorf("Error getting default action of filter")
} else if action != ActKill {
t.Errorf("Default action of filter was set incorrectly!")
}
// Reset with a different default action
err = filter.Reset(ActAllow)
if err != nil {
t.Errorf("Error resetting filter!")
}
valid := filter.IsValid()
if !valid {
t.Errorf("Filter is no longer valid after reset!")
}
// The default action should no longer be ActKill
action, err = filter.GetDefaultAction()
if err != nil {
t.Errorf("Error getting default action of filter")
} else if action != ActAllow {
t.Errorf("Default action of filter was set incorrectly!")
}
}
func TestFilterArchFunctions(t *testing.T) {
filter, err := NewFilter(ActKill)
if err != nil {
t.Errorf("Error creating filter: %s", err)
}
defer filter.Release()
arch, err := GetNativeArch()
if err != nil {
t.Errorf("Error getting native architecture: %s", err)
}
present, err := filter.IsArchPresent(arch)
if err != nil {
t.Errorf("Error retrieving arch from filter: %s", err)
} else if !present {
t.Errorf("Filter does not contain native architecture by default")
}
// Adding the native arch again should succeed, as it's already present
err = filter.AddArch(arch)
if err != nil {
t.Errorf("Adding arch to filter already containing it should succeed")
}
// Make sure we don't add the native arch again
prospectiveArch := ArchX86
if arch == ArchX86 {
prospectiveArch = ArchAMD64
}
// Check to make sure this other arch isn't in the filter
present, err = filter.IsArchPresent(prospectiveArch)
if err != nil {
t.Errorf("Error retrieving arch from filter: %s", err)
} else if present {
t.Errorf("Arch not added to filter is present")
}
// Try removing the nonexistant arch - should succeed
err = filter.RemoveArch(prospectiveArch)
if err != nil {
t.Errorf("Error removing nonexistant arch: %s", err)
}
// Add an arch, see if it's in the filter
err = filter.AddArch(prospectiveArch)
if err != nil {
t.Errorf("Could not add arch %s to filter: %s",
prospectiveArch.String(), err)
}
present, err = filter.IsArchPresent(prospectiveArch)
if err != nil {
t.Errorf("Error retrieving arch from filter: %s", err)
} else if !present {
t.Errorf("Filter does not contain architecture %s after it was added",
prospectiveArch.String())
}
// Remove the arch again, make sure it's not in the filter
err = filter.RemoveArch(prospectiveArch)
if err != nil {
t.Errorf("Could not remove arch %s from filter: %s",
prospectiveArch.String(), err)
}
present, err = filter.IsArchPresent(prospectiveArch)
if err != nil {
t.Errorf("Error retrieving arch from filter: %s", err)
} else if present {
t.Errorf("Filter contains architecture %s after it was removed",
prospectiveArch.String())
}
}
func TestFilterAttributeGettersAndSetters(t *testing.T) {
filter, err := NewFilter(ActKill)
if err != nil {
t.Errorf("Error creating filter: %s", err)
}
defer filter.Release()
act, err := filter.GetDefaultAction()
if err != nil {
t.Errorf("Error getting default action: %s", err)
} else if act != ActKill {
t.Errorf("Default action was set incorrectly")
}
err = filter.SetBadArchAction(ActAllow)
if err != nil {
t.Errorf("Error setting bad arch action: %s", err)
}
act, err = filter.GetBadArchAction()
if err != nil {
t.Errorf("Error getting bad arch action")
} else if act != ActAllow {
t.Errorf("Bad arch action was not set correcly!")
}
err = filter.SetNoNewPrivsBit(false)
if err != nil {
t.Errorf("Error setting no new privileges bit")
}
privs, err := filter.GetNoNewPrivsBit()
if err != nil {
t.Errorf("Error getting no new privileges bit!")
} else if privs != false {
t.Errorf("No new privileges bit was not set correctly")
}
err = filter.SetBadArchAction(ActInvalid)
if err == nil {
t.Errorf("Setting bad arch action to an invalid action should error")
}
}
func TestMergeFilters(t *testing.T) {
filter1, err := NewFilter(ActAllow)
if err != nil {
t.Errorf("Error creating filter: %s", err)
}
filter2, err := NewFilter(ActAllow)
if err != nil {
t.Errorf("Error creating filter: %s", err)
}
// Need to remove the native arch and add another to the second filter
// Filters must NOT share architectures to be successfully merged
nativeArch, err := GetNativeArch()
if err != nil {
t.Errorf("Error getting native arch: %s", err)
}
prospectiveArch := ArchAMD64
if nativeArch == ArchAMD64 {
prospectiveArch = ArchX86
}
err = filter2.AddArch(prospectiveArch)
if err != nil {
t.Errorf("Error adding architecture to filter: %s", err)
}
err = filter2.RemoveArch(nativeArch)
if err != nil {
t.Errorf("Error removing architecture from filter: %s", err)
}
err = filter1.Merge(filter2)
if err != nil {
t.Errorf("Error merging filters: %s", err)
}
if filter2.IsValid() {
t.Errorf("Source filter should not be valid after merging")
}
filter3, err := NewFilter(ActKill)
if err != nil {
t.Errorf("Error creating filter: %s", err)
}
defer filter3.Release()
err = filter1.Merge(filter3)
if err == nil {
t.Errorf("Attributes should have to match to merge filters")
}
}
func TestRuleAddAndLoad(t *testing.T) {
// Test #1: Add a trivial filter
filter1, err := NewFilter(ActAllow)
if err != nil {
t.Errorf("Error creating filter: %s", err)
}
defer filter1.Release()
call, err := GetSyscallFromName("getpid")
if err != nil {
t.Errorf("Error getting syscall number of getpid: %s", err)
}
call2, err := GetSyscallFromName("setreuid")
if err != nil {
t.Errorf("Error getting syscall number of setreuid: %s", err)
}
uid := syscall.Getuid()
euid := syscall.Geteuid()
err = filter1.AddRule(call, ActErrno.SetReturnCode(0x1))
if err != nil {
t.Errorf("Error adding rule to restrict syscall: %s", err)
}
cond, err := MakeCondition(1, CompareEqual, uint64(euid))
if err != nil {
t.Errorf("Error making rule to restrict syscall: %s", err)
}
cond2, err := MakeCondition(0, CompareEqual, uint64(uid))
if err != nil {
t.Errorf("Error making rule to restrict syscall: %s", err)
}
conditions := []ScmpCondition{cond, cond2}
err = filter1.AddRuleConditional(call2, ActErrno.SetReturnCode(0x2), conditions)
if err != nil {
t.Errorf("Error adding conditional rule: %s", err)
}
err = filter1.Load()
if err != nil {
t.Errorf("Error loading filter: %s", err)
}
// Try making a simple syscall, it should error
pid := syscall.Getpid()
if pid != -1 {
t.Errorf("Syscall should have returned error code!")
}
// Try making a Geteuid syscall that should normally succeed
err = syscall.Setreuid(uid, euid)
if err != syscall.Errno(2) {
t.Errorf("Syscall should have returned error code!")
}
}

View File

@ -13,36 +13,46 @@ type IDMap struct {
Size int `json:"size"` Size int `json:"size"`
} }
// Seccomp represents syscall restrictions
type Seccomp struct { type Seccomp struct {
Syscalls []*Syscall `json:"syscalls"` DefaultAction Action `json:"default_action"`
Syscalls []*Syscall `json:"syscalls"`
} }
// An action to be taken upon rule match in Seccomp
type Action int type Action int
const ( const (
Kill Action = iota - 3 Kill Action = iota - 4
Errno
Trap Trap
Allow Allow
) )
// A comparison operator to be used when matching syscall arguments in Seccomp
type Operator int type Operator int
const ( const (
EqualTo Operator = iota EqualTo Operator = iota
NotEqualTo NotEqualTo
GreatherThan GreaterThan
GreaterThanOrEqualTo
LessThan LessThan
LessThanOrEqualTo
MaskEqualTo MaskEqualTo
) )
// A rule to match a specific syscall argument in Seccomp
type Arg struct { type Arg struct {
Index int `json:"index"` Index uint `json:"index"`
Value uint32 `json:"value"` Value uint64 `json:"value"`
Op Operator `json:"op"` ValueTwo uint64 `json:"value_two"`
Op Operator `json:"op"`
} }
// An rule to match a syscall in Seccomp
type Syscall struct { type Syscall struct {
Value int `json:"value"` Name string `json:"name"`
Action Action `json:"action"` Action Action `json:"action"`
Args []*Arg `json:"args"` Args []*Arg `json:"args"`
} }
@ -140,7 +150,7 @@ type Config struct {
Sysctl map[string]string `json:"sysctl"` Sysctl map[string]string `json:"sysctl"`
// Seccomp allows actions to be taken whenever a syscall is made within the container. // Seccomp allows actions to be taken whenever a syscall is made within the container.
// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno // A number of rules are given, each having an action to be taken if a syscall matches it.
// can be specified on a per syscall basis. // A default action to be taken if no rules match is also given.
Seccomp *Seccomp `json:"seccomp"` Seccomp *Seccomp `json:"seccomp"`
} }

View File

@ -13,7 +13,6 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/netlink" "github.com/opencontainers/runc/libcontainer/netlink"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runc/libcontainer/user"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
@ -270,61 +269,3 @@ func killCgroupProcesses(m cgroups.Manager) error {
} }
return nil return nil
} }
func finalizeSeccomp(config *initConfig) error {
if config.Config.Seccomp == nil {
return nil
}
context := seccomp.New()
for _, s := range config.Config.Seccomp.Syscalls {
ss := &seccomp.Syscall{
Value: uint32(s.Value),
Action: seccompAction(s.Action),
}
if len(s.Args) > 0 {
ss.Args = seccompArgs(s.Args)
}
context.Add(ss)
}
return context.Load()
}
func seccompAction(a configs.Action) seccomp.Action {
switch a {
case configs.Kill:
return seccomp.Kill
case configs.Trap:
return seccomp.Trap
case configs.Allow:
return seccomp.Allow
}
return seccomp.Error(syscall.Errno(int(a)))
}
func seccompArgs(args []*configs.Arg) seccomp.Args {
var sa []seccomp.Arg
for _, a := range args {
sa = append(sa, seccomp.Arg{
Index: uint32(a.Index),
Op: seccompOperator(a.Op),
Value: uint(a.Value),
})
}
return seccomp.Args{sa}
}
func seccompOperator(o configs.Operator) seccomp.Operator {
switch o {
case configs.EqualTo:
return seccomp.EqualTo
case configs.NotEqualTo:
return seccomp.NotEqualTo
case configs.GreatherThan:
return seccomp.GreatherThan
case configs.LessThan:
return seccomp.LessThan
case configs.MaskEqualTo:
return seccomp.MaskEqualTo
}
return 0
}

View File

@ -793,33 +793,6 @@ func TestSysctl(t *testing.T) {
} }
} }
func TestSeccompNoChown(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{}
config.Seccomp.Syscalls = append(config.Seccomp.Syscalls, &configs.Syscall{
Value: syscall.SYS_CHOWN,
Action: configs.Action(syscall.EPERM),
})
buffers, _, err := runContainer(config, "", "/bin/sh", "-c", "chown 1:1 /tmp")
if err == nil {
t.Fatal("running chown in a container should fail")
}
if buffers == nil {
t.Fatalf("Container wasn't even created: %v", err)
}
if s := buffers.String(); !strings.Contains(s, "not permitted") {
t.Fatalf("running chown should result in an EPERM but got %q", s)
}
}
func TestMountCgroupRO(t *testing.T) { func TestMountCgroupRO(t *testing.T) {
if testing.Short() { if testing.Short() {
return return

View File

@ -0,0 +1,216 @@
// +build linux,cgo
package integration
import (
"strings"
"syscall"
"testing"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/configs"
libseccomp "github.com/seccomp/libseccomp-golang"
)
func TestSeccompDenyGetcwd(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "getcwd",
Action: configs.Errno,
},
},
}
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
buffers := newStdBuffers()
pwd := &libcontainer.Process{
Args: []string{"pwd"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Start(pwd)
if err != nil {
t.Fatal(err)
}
ps, err := pwd.Wait()
if err == nil {
t.Fatal("Expecting error (negative return code); instead exited cleanly!")
}
var exitCode int
status := ps.Sys().(syscall.WaitStatus)
if status.Exited() {
exitCode = status.ExitStatus()
} else if status.Signaled() {
exitCode = -int(status.Signal())
} else {
t.Fatalf("Unrecognized exit reason!")
}
if exitCode == 0 {
t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode)
}
expected := "pwd: getcwd: Operation not permitted"
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}
func TestSeccompPermitWriteConditional(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "write",
Action: configs.Errno,
Args: []*configs.Arg{
{
Index: 0,
Value: 1,
Op: configs.GreaterThan,
},
},
},
},
}
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
buffers := newStdBuffers()
dmesg := &libcontainer.Process{
Args: []string{"busybox", "ls", "/"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Start(dmesg)
if err != nil {
t.Fatal(err)
}
if _, err := dmesg.Wait(); err != nil {
t.Fatalf("%s: %s", err, buffers.Stderr)
}
}
func TestSeccompDenyWriteConditional(t *testing.T) {
if testing.Short() {
return
}
// Only test if library version is v2.2.1 or higher
// Conditional filtering will always error in v2.2.0 and lower
major, minor, micro := libseccomp.GetLibraryVersion()
if (major == 2 && minor < 2) || (major == 2 && minor == 2 && micro < 1) {
return
}
rootfs, err := newRootfs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
config.Seccomp = &configs.Seccomp{
DefaultAction: configs.Allow,
Syscalls: []*configs.Syscall{
{
Name: "write",
Action: configs.Errno,
Args: []*configs.Arg{
{
Index: 0,
Value: 1,
Op: configs.GreaterThan,
},
},
},
},
}
container, err := newContainer(config)
if err != nil {
t.Fatal(err)
}
defer container.Destroy()
buffers := newStdBuffers()
dmesg := &libcontainer.Process{
Args: []string{"busybox", "ls", "does_not_exist"},
Env: standardEnvironment,
Stdin: buffers.Stdin,
Stdout: buffers.Stdout,
Stderr: buffers.Stderr,
}
err = container.Start(dmesg)
if err != nil {
t.Fatal(err)
}
ps, err := dmesg.Wait()
if err == nil {
t.Fatal("Expecting negative return, instead got 0!")
}
var exitCode int
status := ps.Sys().(syscall.WaitStatus)
if status.Exited() {
exitCode = status.ExitStatus()
} else if status.Signaled() {
exitCode = -int(status.Signal())
} else {
t.Fatalf("Unrecognized exit reason!")
}
if exitCode == 0 {
t.Fatalf("Busybox should fail with negative exit code, instead got %d!", exitCode)
}
// We're denying write to stderr, so we expect an empty buffer
expected := ""
actual := strings.Trim(buffers.Stderr.String(), "\n")
if actual != expected {
t.Fatalf("Expected output %s but got %s\n", expected, actual)
}
}

View File

@ -1,34 +0,0 @@
// +build linux
package seccomp
import "strings"
type bpfLabel struct {
label string
location uint32
}
type bpfLabels []bpfLabel
// labelIndex returns the index for the label if it exists in the slice.
// if it does not exist in the slice it appends the label lb to the end
// of the slice and returns the index.
func labelIndex(labels *bpfLabels, lb string) uint32 {
var id uint32
for id = 0; id < uint32(len(*labels)); id++ {
if strings.EqualFold(lb, (*labels)[id].label) {
return id
}
}
*labels = append(*labels, bpfLabel{lb, 0xffffffff})
return id
}
func scmpBpfStmt(code uint16, k uint32) sockFilter {
return sockFilter{code, 0, 0, k}
}
func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter {
return sockFilter{code, jt, jf, k}
}

View File

@ -1,146 +0,0 @@
// +build linux
package seccomp
import (
"errors"
"syscall"
)
const labelTemplate = "lb-%d-%d"
// Action is the type of action that will be taken when a
// syscall is performed.
type Action int
const (
Kill Action = iota - 3 // Kill the calling process of the syscall.
Trap // Trap and coredump the calling process of the syscall.
Allow // Allow the syscall to be completed.
)
// Syscall is the specified syscall, action, and any type of arguments
// to filter on.
type Syscall struct {
// Value is the syscall number.
Value uint32
// Action is the action to perform when the specified syscall is made.
Action Action
// Args are filters that can be specified on the arguments to the syscall.
Args Args
}
func (s *Syscall) scmpAction() uint32 {
switch s.Action {
case Allow:
return retAllow
case Trap:
return retTrap
case Kill:
return retKill
}
return actionErrno(uint32(s.Action))
}
// Arg represents an argument to the syscall with the argument's index,
// the operator to apply when matching, and the argument's value at that time.
type Arg struct {
Index uint32 // index of args which start from zero
Op Operator // operation, such as EQ/NE/GE/LE
Value uint // the value of arg
}
type Args [][]Arg
var (
ErrUnresolvedLabel = errors.New("seccomp: unresolved label")
ErrDuplicateLabel = errors.New("seccomp: duplicate label use")
ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument")
)
// Error returns an Action that will be used to send the calling
// process the specified errno when the syscall is made.
func Error(code syscall.Errno) Action {
return Action(code)
}
// New returns a new syscall context for use.
func New() *Context {
return &Context{
syscalls: make(map[uint32]*Syscall),
}
}
// Context holds syscalls for the current process to limit the type of
// actions the calling process can make.
type Context struct {
syscalls map[uint32]*Syscall
}
// Add will add the specified syscall, action, and arguments to the seccomp
// Context.
func (c *Context) Add(s *Syscall) {
c.syscalls[s.Value] = s
}
// Remove removes the specified syscall configuration from the Context.
func (c *Context) Remove(call uint32) {
delete(c.syscalls, call)
}
// Load will apply the Context to the calling process makeing any secccomp process changes
// apply after the context is loaded.
func (c *Context) Load() error {
filter, err := c.newFilter()
if err != nil {
return err
}
if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil {
return err
}
prog := newSockFprog(filter)
return prog.set()
}
func (c *Context) newFilter() ([]sockFilter, error) {
var (
labels bpfLabels
f = newFilter()
)
for _, s := range c.syscalls {
f.addSyscall(s, &labels)
}
f.allow()
// process args for the syscalls
for _, s := range c.syscalls {
if err := f.addArguments(s, &labels); err != nil {
return nil, err
}
}
// apply labels for arguments
idx := int32(len(*f) - 1)
for ; idx >= 0; idx-- {
lf := &(*f)[idx]
if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) {
continue
}
rel := int32(lf.jt)<<8 | int32(lf.jf)
if ((jumpJT << 8) | jumpJF) == rel {
if labels[lf.k].location == 0xffffffff {
return nil, ErrUnresolvedLabel
}
lf.k = labels[lf.k].location - uint32(idx+1)
lf.jt = 0
lf.jf = 0
} else if ((labelJT << 8) | labelJF) == rel {
if labels[lf.k].location != 0xffffffff {
return nil, ErrDuplicateLabel
}
labels[lf.k].location = uint32(idx)
lf.k = 0
lf.jt = 0
lf.jf = 0
}
}
return *f, nil
}

View File

@ -1,118 +0,0 @@
// +build linux
package seccomp
import (
"fmt"
"syscall"
"unsafe"
)
type sockFilter struct {
code uint16
jt uint8
jf uint8
k uint32
}
func newFilter() *filter {
var f filter
f = append(f, sockFilter{
pfLD + syscall.BPF_W + syscall.BPF_ABS,
0,
0,
uint32(unsafe.Offsetof(secData.nr)),
})
return &f
}
type filter []sockFilter
func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) {
if len(s.Args) == 0 {
f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()))
} else {
if len(s.Args[0]) > 0 {
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index)
f.call(s.Value,
scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
jumpJT, jumpJF))
}
}
}
func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error {
for i := 0; len(s.Args) > i; i++ {
if len(s.Args[i]) > 0 {
lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index)
f.label(labels, lb)
f.arg(s.Args[i][0].Index)
}
for j := 0; j < len(s.Args[i]); j++ {
var jf sockFilter
if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 {
lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index)
jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA,
labelIndex(labels, lbj), jumpJT, jumpJF)
} else {
jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())
}
if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil {
return err
}
}
f.allow()
}
return nil
}
func (f *filter) label(labels *bpfLabels, lb string) {
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF))
}
func (f *filter) call(nr uint32, jt sockFilter) {
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1))
*f = append(*f, jt)
}
func (f *filter) allow() {
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow))
}
func (f *filter) deny() {
*f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap))
}
func (f *filter) arg(index uint32) {
arg(f, index)
}
func (f *filter) op(operation Operator, v uint, jf sockFilter) error {
switch operation {
case EqualTo:
jumpEqualTo(f, v, jf)
case NotEqualTo:
jumpNotEqualTo(f, v, jf)
case GreatherThan:
jumpGreaterThan(f, v, jf)
case LessThan:
jumpLessThan(f, v, jf)
case MaskEqualTo:
jumpMaskEqualTo(f, v, jf)
default:
return ErrUnsupportedOperation
}
return nil
}
func arg(f *filter, idx uint32) {
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx)))
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx)))
*f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1))
}
func jump(f *filter, labels *bpfLabels, lb string) {
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb),
jumpJT, jumpJF))
}

View File

@ -1,68 +0,0 @@
// +build linux,amd64
package seccomp
// Using BPF filters
//
// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf
import "syscall"
func jumpGreaterThan(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
func jumpEqualTo(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
func jumpLessThan(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
func jumpNotEqualTo(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}
// this checks for a value inside a mask. The evalusation is equal to doing
// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER
func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) {
lo := uint32(uint64(v) % 0x100000000)
hi := uint32(uint64(v) / 0x100000000)
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0))
*f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v)))
*f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2))
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
*f = append(*f, jt)
*f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1))
}

View File

@ -0,0 +1,165 @@
// +build linux,cgo
package seccomp
import (
"fmt"
"log"
"syscall"
"github.com/opencontainers/runc/libcontainer/configs"
libseccomp "github.com/seccomp/libseccomp-golang"
)
var (
actAllow = libseccomp.ActAllow
actTrap = libseccomp.ActTrap
actKill = libseccomp.ActKill
actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM))
)
// Filters given syscalls in a container, preventing them from being used
// Started in the container init process, and carried over to all child processes
// Setns calls, however, require a separate invocation, as they are not children
// of the init until they join the namespace
func InitSeccomp(config *configs.Seccomp) error {
if config == nil {
return fmt.Errorf("cannot initialize Seccomp - nil config passed")
}
defaultAction, err := getAction(config.DefaultAction)
if err != nil {
return fmt.Errorf("error initializing seccomp - invalid default action")
}
filter, err := libseccomp.NewFilter(defaultAction)
if err != nil {
return fmt.Errorf("error creating filter: %s", err)
}
// Unset no new privs bit
if err := filter.SetNoNewPrivsBit(false); err != nil {
return fmt.Errorf("error setting no new privileges: %s", err)
}
// Add a rule for each syscall
for _, call := range config.Syscalls {
if call == nil {
return fmt.Errorf("encountered nil syscall while initializing Seccomp")
}
if err = matchCall(filter, call); err != nil {
return err
}
}
if err = filter.Load(); err != nil {
return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
}
return nil
}
// Convert Libcontainer Action to Libseccomp ScmpAction
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
switch act {
case configs.Kill:
return actKill, nil
case configs.Errno:
return actErrno, nil
case configs.Trap:
return actTrap, nil
case configs.Allow:
return actAllow, nil
default:
return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
}
}
// Convert Libcontainer Operator to Libseccomp ScmpCompareOp
func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
switch op {
case configs.EqualTo:
return libseccomp.CompareEqual, nil
case configs.NotEqualTo:
return libseccomp.CompareNotEqual, nil
case configs.GreaterThan:
return libseccomp.CompareGreater, nil
case configs.GreaterThanOrEqualTo:
return libseccomp.CompareGreaterEqual, nil
case configs.LessThan:
return libseccomp.CompareLess, nil
case configs.LessThanOrEqualTo:
return libseccomp.CompareLessOrEqual, nil
case configs.MaskEqualTo:
return libseccomp.CompareMaskedEqual, nil
default:
return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
}
}
// Convert Libcontainer Arg to Libseccomp ScmpCondition
func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
cond := libseccomp.ScmpCondition{}
if arg == nil {
return cond, fmt.Errorf("cannot convert nil to syscall condition")
}
op, err := getOperator(arg.Op)
if err != nil {
return cond, err
}
return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
}
// Add a rule to match a single syscall
func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
if call == nil || filter == nil {
return fmt.Errorf("cannot use nil as syscall to block")
}
if len(call.Name) == 0 {
return fmt.Errorf("empty string is not a valid syscall")
}
// If we can't resolve the syscall, assume it's not supported on this kernel
// Ignore it, don't error out
callNum, err := libseccomp.GetSyscallFromName(call.Name)
if err != nil {
log.Printf("Error resolving syscall name %s: %s - ignoring syscall.", call.Name, err)
return nil
}
// Convert the call's action to the libseccomp equivalent
callAct, err := getAction(call.Action)
if err != nil {
return err
}
// Unconditional match - just add the rule
if len(call.Args) == 0 {
if err = filter.AddRule(callNum, callAct); err != nil {
return err
}
} else {
// Conditional match - convert the per-arg rules into library format
conditions := []libseccomp.ScmpCondition{}
for _, cond := range call.Args {
newCond, err := getCondition(cond)
if err != nil {
return err
}
conditions = append(conditions, newCond)
}
if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
return err
}
}
return nil
}

View File

@ -1,124 +0,0 @@
// +build linux
// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go.
package seccomp
import (
"syscall"
"unsafe"
)
// Operator that is used for argument comparison.
type Operator int
const (
EqualTo Operator = iota
NotEqualTo
GreatherThan
LessThan
MaskEqualTo
)
const (
jumpJT = 0xff
jumpJF = 0xff
labelJT = 0xfe
labelJF = 0xfe
)
const (
pfLD = 0x0
retKill = 0x00000000
retTrap = 0x00030000
retAllow = 0x7fff0000
modeFilter = 0x2
prSetNoNewPrivileges = 0x26
)
func actionErrno(errno uint32) uint32 {
return 0x00050000 | (errno & 0x0000ffff)
}
var (
secData = struct {
nr int32
arch uint32
insPointer uint64
args [6]uint64
}{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}}
)
var isLittle = func() bool {
var (
x = 0x1234
p = unsafe.Pointer(&x)
p2 = (*[unsafe.Sizeof(0)]byte)(p)
)
if p2[0] == 0 {
return false
}
return true
}()
var endian endianSupport
type endianSupport struct {
}
func (e endianSupport) hi(i uint32) uint32 {
if isLittle {
return e.little(i)
}
return e.big(i)
}
func (e endianSupport) low(i uint32) uint32 {
if isLittle {
return e.big(i)
}
return e.little(i)
}
func (endianSupport) big(idx uint32) uint32 {
if idx >= 6 {
return 0
}
return uint32(unsafe.Offsetof(secData.args)) + 8*idx
}
func (endianSupport) little(idx uint32) uint32 {
if idx < 0 || idx >= 6 {
return 0
}
return uint32(unsafe.Offsetof(secData.args)) +
uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch))
}
func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error {
_, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
if err != 0 {
return err
}
return nil
}
func newSockFprog(filter []sockFilter) *sockFprog {
return &sockFprog{
len: uint16(len(filter)),
filt: filter,
}
}
type sockFprog struct {
len uint16
filt []sockFilter
}
func (s *sockFprog) set() error {
_, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP),
uintptr(modeFilter), uintptr(unsafe.Pointer(s)))
if err != 0 {
return err
}
return nil
}

View File

@ -1,3 +1,12 @@
// +build !linux // +build !linux !cgo
package seccomp package seccomp
import (
"github.com/opencontainers/runc/libcontainer/configs"
)
// Seccomp not supported, do nothing
func InitSeccomp(config *configs.Seccomp) error {
return nil
}

View File

@ -7,6 +7,7 @@ import (
"github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/label" "github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
) )
@ -20,6 +21,11 @@ func (l *linuxSetnsInit) Init() error {
if err := setupRlimits(l.config.Config); err != nil { if err := setupRlimits(l.config.Config); err != nil {
return err return err
} }
if l.config.Config.Seccomp != nil {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil { if err := finalizeNamespace(l.config); err != nil {
return err return err
} }

View File

@ -9,6 +9,7 @@ import (
"github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/label" "github.com/opencontainers/runc/libcontainer/label"
"github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
) )
@ -85,6 +86,11 @@ func (l *linuxStandardInit) Init() error {
if err != nil { if err != nil {
return err return err
} }
if l.config.Config.Seccomp != nil {
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
return err
}
}
if err := finalizeNamespace(l.config); err != nil { if err := finalizeNamespace(l.config); err != nil {
return err return err
} }
@ -99,8 +105,5 @@ func (l *linuxStandardInit) Init() error {
if syscall.Getppid() != l.parentPid { if syscall.Getppid() != l.parentPid {
return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) return syscall.Kill(syscall.Getpid(), syscall.SIGKILL)
} }
if err := finalizeSeccomp(l.config); err != nil {
return err
}
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
} }

View File

@ -1,7 +1,7 @@
FROM golang:1.4 FROM golang:1.4
RUN echo "deb http://ftp.us.debian.org/debian testing main contrib" >> /etc/apt/sources.list RUN echo "deb http://ftp.us.debian.org/debian testing main contrib" >> /etc/apt/sources.list
RUN apt-get update && apt-get install -y iptables criu=1.6-2 && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y iptables criu=1.6-2 libseccomp2 libseccomp-dev && rm -rf /var/lib/apt/lists/*
# setup a playground for us to spawn containers in # setup a playground for us to spawn containers in
RUN mkdir /busybox && \ RUN mkdir /busybox && \