diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index e355c3e7..df44b927 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -58,6 +58,10 @@ { "ImportPath": "github.com/syndtr/gocapability/capability", "Rev": "e55e5833692b49e49a0073ad5baf7803f21bebf4" + }, + { + "ImportPath": "github.com/seccomp/libseccomp-golang", + "Rev": "4ad869ffe4033151a18ef08fc260213051d8388d" } ] } diff --git a/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/LICENSE b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/LICENSE new file mode 100644 index 00000000..81cf60de --- /dev/null +++ b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/LICENSE @@ -0,0 +1,22 @@ +Copyright (c) 2015 Matthew Heon +Copyright (c) 2015 Paul Moore +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/README b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/README new file mode 100644 index 00000000..64cab691 --- /dev/null +++ b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/README @@ -0,0 +1,26 @@ +libseccomp-golang: Go Language Bindings for the libseccomp Project +=============================================================================== +https://github.com/seccomp/libseccomp-golang +https://github.com/seccomp/libseccomp + +The libseccomp library provides an easy to use, platform independent, interface +to the Linux Kernel's syscall filtering mechanism. The libseccomp API is +designed to abstract away the underlying BPF based syscall filter language and +present a more conventional function-call based filtering interface that should +be familiar to, and easily adopted by, application developers. + +The libseccomp-golang library provides a Go based interface to the libseccomp +library. + +* Online Resources + +The library source repository currently lives on GitHub at the following URLs: + + -> https://github.com/seccomp/libseccomp-golang + -> https://github.com/seccomp/libseccomp + +The project mailing list is currently hosted on Google Groups at the URL below, +please note that a Google account is not required to subscribe to the mailing +list. + + -> https://groups.google.com/d/forum/libseccomp diff --git a/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/seccomp.go b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/seccomp.go new file mode 100644 index 00000000..cd4d4e6d --- /dev/null +++ b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/seccomp.go @@ -0,0 +1,774 @@ +// +build linux + +// Public API specification for libseccomp Go bindings +// Contains public API for the bindings + +// Provides bindings for libseccomp, a library wrapping the Linux seccomp +// syscall. Seccomp enables an application to restrict system call use for +// itself and its children. +package seccomp + +import ( + "fmt" + "os" + "runtime" + "strings" + "sync" + "syscall" + "unsafe" +) + +// C wrapping code + +// #cgo LDFLAGS: -lseccomp +// #include +// #include +import "C" + +// Exported types + +// Represents a CPU architecture. +// Seccomp can restrict syscalls on a per-architecture basis. +type ScmpArch uint + +// Represents an action to be taken on a filter rule match in libseccomp +type ScmpAction uint + +// Represents a comparison operator which can be used in a filter rule +type ScmpCompareOp uint + +// Represents a rule in a libseccomp filter context +type ScmpCondition struct { + Argument uint `json:"argument,omitempty"` + Op ScmpCompareOp `json:"operator,omitempty"` + Operand1 uint64 `json:"operand_one,omitempty"` + Operand2 uint64 `json:"operand_two,omitempty"` +} + +// Represents a Linux System Call +type ScmpSyscall int32 + +// Exported Constants + +const ( + // Valid architectures recognized by libseccomp + + // Ensure uninitialized ScmpArch variables are invalid + ArchInvalid ScmpArch = iota + // The native architecture of the kernel + ArchNative ScmpArch = iota + // 32-bit x86 syscalls + ArchX86 ScmpArch = iota + // 64-bit x86-64 syscalls + ArchAMD64 ScmpArch = iota + // 64-bit x86-64 syscalls (32-bit pointers) + ArchX32 ScmpArch = iota + // 32-bit ARM syscalls + ArchARM ScmpArch = iota + // 64-bit ARM syscalls + ArchARM64 ScmpArch = iota + // 32-bit MIPS syscalls + ArchMIPS ScmpArch = iota + // 64-bit MIPS syscalls + ArchMIPS64 ScmpArch = iota + // 64-bit MIPS syscalls (32-bit pointers) + ArchMIPS64N32 ScmpArch = iota + // 32-bit MIPS syscalls (little endian) + ArchMIPSEL ScmpArch = iota + // 64-bit MIPS syscalls (little endian) + ArchMIPSEL64 ScmpArch = iota + // 64-bit MIPS syscalls (little endian, 32-bit pointers) + ArchMIPSEL64N32 ScmpArch = iota +) + +const ( + // Supported actions on filter match + + // Ensure uninitialized ScmpAction variables are invalid + ActInvalid ScmpAction = iota + // Kill process + ActKill ScmpAction = iota + // Throw SIGSYS + ActTrap ScmpAction = iota + // The syscall will return an negative error code + // This code can be set with the SetReturnCode method + ActErrno ScmpAction = iota + // Notify tracing processes with given error code + // This code can be set with the SetReturnCode method + ActTrace ScmpAction = iota + // Permit the syscall to continue execution + ActAllow ScmpAction = iota +) + +const ( + // These are comparison operators used in conditional seccomp rules + + // Ensure uninitialized ScmpCompareOp variables are invalid + CompareInvalid ScmpCompareOp = iota + CompareNotEqual ScmpCompareOp = iota + CompareLess ScmpCompareOp = iota + CompareLessOrEqual ScmpCompareOp = iota + CompareEqual ScmpCompareOp = iota + CompareGreaterEqual ScmpCompareOp = iota + CompareGreater ScmpCompareOp = iota + CompareMaskedEqual ScmpCompareOp = iota +) + +// Helpers for types + +// Return an ScmpArch constant from a string representing an architecture +func GetArchFromString(arch string) (ScmpArch, error) { + switch strings.ToLower(arch) { + case "x86": + return ArchX86, nil + case "amd64", "x86-64", "x86_64", "x64": + return ArchAMD64, nil + case "x32": + return ArchX32, nil + case "arm": + return ArchARM, nil + case "arm64", "aarch64": + return ArchARM64, nil + case "mips": + return ArchMIPS, nil + case "mips64": + return ArchMIPS64, nil + case "mips64n32": + return ArchMIPS64N32, nil + case "mipsel": + return ArchMIPSEL, nil + case "mipsel64": + return ArchMIPSEL64, nil + case "mipsel64n32": + return ArchMIPSEL64N32, nil + default: + return ArchInvalid, fmt.Errorf("Cannot convert unrecognized string %s", arch) + } +} + +// Returns a string representation of an architecture constant +func (a ScmpArch) String() string { + switch a { + case ArchX86: + return "x86" + case ArchAMD64: + return "amd64" + case ArchX32: + return "x32" + case ArchARM: + return "arm" + case ArchARM64: + return "arm64" + case ArchMIPS: + return "mips" + case ArchMIPS64: + return "mips64" + case ArchMIPS64N32: + return "mips64n32" + case ArchMIPSEL: + return "mipsel" + case ArchMIPSEL64: + return "mipsel64" + case ArchMIPSEL64N32: + return "mipsel64n32" + case ArchNative: + return "native" + case ArchInvalid: + return "Invalid architecture" + default: + return "Unknown architecture" + } +} + +// Returns a string representation of a comparison operator constant +func (a ScmpCompareOp) String() string { + switch a { + case CompareNotEqual: + return "Not equal" + case CompareLess: + return "Less than" + case CompareLessOrEqual: + return "Less than or equal to" + case CompareEqual: + return "Equal" + case CompareGreaterEqual: + return "Greater than or equal to" + case CompareGreater: + return "Greater than" + case CompareMaskedEqual: + return "Masked equality" + case CompareInvalid: + return "Invalid comparison operator" + default: + return "Unrecognized comparison operator" + } +} + +// Returns a string representation of a seccomp match action +func (a ScmpAction) String() string { + switch a & 0xFFFF { + case ActKill: + return "Action: Kill Process" + case ActTrap: + return "Action: Send SIGSYS" + case ActErrno: + return fmt.Sprintf("Action: Return error code %d", (a >> 16)) + case ActTrace: + return fmt.Sprintf("Action: Notify tracing processes with code %d", + (a >> 16)) + case ActAllow: + return "Action: Allow system call" + default: + return "Unrecognized Action" + } +} + +// Add a return code to a supporting ScmpAction, clearing any existing code +// Only valid on ActErrno and ActTrace. Takes no action otherwise. +// Accepts 16-bit return code as argument. +// Returns a valid ScmpAction of the original type with the new error code set. +func (a ScmpAction) SetReturnCode(code int16) ScmpAction { + aTmp := a & 0x0000FFFF + if aTmp == ActErrno || aTmp == ActTrace { + return (aTmp | (ScmpAction(code)&0xFFFF)<<16) + } + return a +} + +// Get the return code of an ScmpAction +func (a ScmpAction) GetReturnCode() int16 { + return int16(a >> 16) +} + +// Syscall functions + +// Get the name of a syscall from its number. +// Acts on any syscall number. +// Returns either a string containing the name of the syscall, or an error. +func (s ScmpSyscall) GetName() (string, error) { + return s.GetNameByArch(ArchNative) +} + +// Get the name of a syscall from its number for a given architecture. +// Acts on any syscall number. +// Accepts a valid architecture constant. +// Returns either a string containing the name of the syscall, or an error. +// if the syscall is unrecognized or an issue occurred. +func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) { + if err := sanitizeArch(arch); err != nil { + return "", err + } + + cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s)) + if cString == nil { + return "", fmt.Errorf("Could not resolve syscall name") + } + defer C.free(unsafe.Pointer(cString)) + + finalStr := C.GoString(cString) + return finalStr, nil +} + +// Get the number of a syscall by name on the kernel's native architecture. +// Accepts a string containing the name of a syscall. +// Returns the number of the syscall, or an error if no syscall with that name +// was found. +func GetSyscallFromName(name string) (ScmpSyscall, error) { + cString := C.CString(name) + defer C.free(unsafe.Pointer(cString)) + + result := C.seccomp_syscall_resolve_name(cString) + if result == scmpError { + return 0, fmt.Errorf("Could not resolve name to syscall") + } + + return ScmpSyscall(result), nil +} + +// Get the number of a syscall by name for a given architecture's ABI. +// Accepts the name of a syscall and an architecture constant. +// Returns the number of the syscall, or an error if an invalid architecture is +// passed or a syscall with that name was not found. +func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) { + if err := sanitizeArch(arch); err != nil { + return 0, err + } + + cString := C.CString(name) + defer C.free(unsafe.Pointer(cString)) + + result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString) + if result == scmpError { + return 0, fmt.Errorf("Could not resolve name to syscall") + } + + return ScmpSyscall(result), nil +} + +// Make a new condition to attach to a filter rule. +// Associated rules will only match if this condition is true. +// Accepts the number the argument we are checking, and a comparison operator +// and value to compare to. +// The rule will match if argument $arg (zero-indexed) of the syscall is +// $COMPARE_OP the provided comparison value. +// Some comparison operators accept two values. Masked equals, for example, +// will mask $arg of the syscall with the second value provided (via bitwise +// AND) and then compare against the first value provided. +// For example, in the less than or equal case, if the syscall argument was +// 0 and the value provided was 1, the condition would match, as 0 is less +// than or equal to 1. +// Return either an error on bad argument or a valid ScmpCondition struct. +func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) { + var condStruct ScmpCondition + + if comparison == CompareInvalid { + return condStruct, fmt.Errorf("Invalid comparison operator!") + } else if arg > 5 { + return condStruct, fmt.Errorf("Syscalls only have up to 6 arguments!") + } else if len(values) > 2 { + return condStruct, fmt.Errorf("Conditions can have at most 2 arguments!") + } else if len(values) == 0 { + return condStruct, fmt.Errorf("Must provide at least one value to compare against!") + } + + condStruct.Argument = arg + condStruct.Op = comparison + condStruct.Operand1 = values[0] + if len(values) == 2 { + condStruct.Operand2 = values[1] + } else { + condStruct.Operand2 = 0 // Unused + } + + return condStruct, nil +} + +// Utility Functions + +// Returns architecture token representing the native kernel architecture +func GetNativeArch() (ScmpArch, error) { + arch := C.seccomp_arch_native() + + return archFromNative(arch) +} + +// Public Filter API + +// Represents a filter context in libseccomp. +// A filter context is initially empty. Rules can be added to it, and it can +// then be loaded into the kernel. +type ScmpFilter struct { + filterCtx C.scmp_filter_ctx + valid bool + lock sync.Mutex +} + +// Create a new filter context. +// Accepts a default action to be taken for syscalls which match no rules in +// the filter. +// Returns a reference to a valid filter context, or nil and an error if the +// filter context could not be created or an invalid default action was given. +func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) { + if err := sanitizeAction(defaultAction); err != nil { + return nil, err + } + + fPtr := C.seccomp_init(defaultAction.toNative()) + if fPtr == nil { + return nil, fmt.Errorf("Could not create filter") + } + + filter := new(ScmpFilter) + filter.filterCtx = fPtr + filter.valid = true + runtime.SetFinalizer(filter, filterFinalizer) + + return filter, nil +} + +// Determine whether a filter context is valid to use. +// Some operations (Release and Merge) render filter contexts invalid and +// consequently prevent further use. +func (f *ScmpFilter) IsValid() bool { + f.lock.Lock() + defer f.lock.Unlock() + + return f.valid +} + +// Reset a filter context, removing all its existing state. +// Accepts a new default action to be taken for syscalls which do not match. +// Returns an error if the filter or action provided are invalid. +func (f *ScmpFilter) Reset(defaultAction ScmpAction) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeAction(defaultAction); err != nil { + return err + } else if !f.valid { + return fmt.Errorf("Filter is invalid or uninitialized") + } + + retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative()) + if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Releases a filter context, freeing its memory. Should be called after +// loading into the kernel, when the filter is no longer needed. +// After calling this function, the given filter is no longer valid and cannot +// be used. +// Release() will be invoked automatically when a filter context is garbage +// collected, but can also be called manually to free memory. +func (f *ScmpFilter) Release() { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return + } + + f.valid = false + C.seccomp_release(f.filterCtx) +} + +// Merge two filter contexts. +// The source filter src will be released as part of the process, and will no +// longer be usable or valid after this call. +// To be merged, filters must NOT share any architectures, and all their +// attributes must match. +// The filter src will be merged into the filter this is called on. +// The architectures of the src filter not present in the destination, and all +// associated rules, will be added to the destination. +// Returns an error if merging the filters failed. +func (f *ScmpFilter) Merge(src *ScmpFilter) error { + f.lock.Lock() + defer f.lock.Unlock() + + src.lock.Lock() + defer src.lock.Unlock() + + if !src.valid || !f.valid { + return fmt.Errorf( + "One or more of the filter contexts is invalid or uninitialized") + } + + // Merge the filters + retCode := C.seccomp_merge(f.filterCtx, src.filterCtx) + if syscall.Errno(-1*retCode) == syscall.EINVAL { + return fmt.Errorf("Filters could not be merged due to a mismatch in attributes or invalid filter!") + } else if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + src.valid = false + + return nil +} + +// Check if an architecture is present in a filter. +// If a filter contains an architecture, it uses its default action for +// syscalls which do not match rules in it, and its rules can match syscalls +// for that ABI. +// If a filter does not contain an architecture, all syscalls made to that +// kernel ABI will fail with the filter's default Bad Architecture Action +// (by default, killing the process). +// Accepts an architecture constant. +// Returns true if the architecture is present in the filter, false otherwise, +// and an error on an invalid filter context, architecture constant, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return false, err + } else if !f.valid { + return false, fmt.Errorf("Filter is invalid or uninitialized") + } + + retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative()) + if syscall.Errno(-1*retCode) == syscall.EEXIST { + // -EEXIST is "arch not present" + return false, nil + } else if retCode != 0 { + return false, syscall.Errno(-1 * retCode) + } + + return true, nil +} + +// Add an architecture to the filter. +// Accepts an architecture constant. +// Returns an error on invalid filter context or architecture token, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) AddArch(arch ScmpArch) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return err + } else if !f.valid { + return fmt.Errorf("Filter is invalid or uninitialized") + } + + // Libseccomp returns -EEXIST if the specified architecture is already + // present. Succeed silently in this case, as it's not fatal, and the + // architecture is present already. + retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative()) + if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Remove an architecture from the filter. +// Accepts an architecture constant. +// Returns an error on invalid filter context or architecture token, or an +// issue with the call to libseccomp. +func (f *ScmpFilter) RemoveArch(arch ScmpArch) error { + f.lock.Lock() + defer f.lock.Unlock() + + if err := sanitizeArch(arch); err != nil { + return err + } else if !f.valid { + return fmt.Errorf("Filter is invalid or uninitialized") + } + + // Similar to AddArch, -EEXIST is returned if the arch is not present + // Succeed silently in that case, this is not fatal and the architecture + // is not present in the filter after RemoveArch + retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative()) + if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Load a filter context into the kernel. +// Returns an error if the filter context is invalid or the syscall failed. +func (f *ScmpFilter) Load() error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return fmt.Errorf("Filter is invalid or uninitialized") + } + + if retCode := C.seccomp_load(f.filterCtx); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Returns the default action taken on a syscall which does not match a rule in +// the filter, or an error if an issue was encountered retrieving the value. +func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) { + action, err := f.getFilterAttr(filterAttrActDefault, true) + if err != nil { + return 0x0, err + } + + return actionFromNative(action) +} + +// Returns the default action taken on a syscall for an architecture not in the +// filter, or an error if an issue was encountered retrieving the value. +func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) { + action, err := f.getFilterAttr(filterAttrActBadArch, true) + if err != nil { + return 0x0, err + } + + return actionFromNative(action) +} + +// Returns the current state the No New Privileges bit will be set to on the +// filter being loaded, or an error if an issue was encountered retrieving the +// value. +// The No New Privileges bit tells the kernel that new processes run with exec() +// cannot gain more privileges than the process that ran exec(). +// For example, a process with No New Privileges set would be unable to exec +// setuid/setgid executables. +func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) { + noNewPrivs, err := f.getFilterAttr(filterAttrNNP, true) + if err != nil { + return false, err + } + + if noNewPrivs == 0 { + return false, nil + } + + return true, nil +} + +// Returns whether Thread Synchronization will be enabled on the filter being +// loaded, or an error if an issue was encountered retrieving the value. +// Thread Sync ensures that all members of the thread group of the calling +// process will share the same Seccomp filter set. +// Tsync is a fairly recent addition to the Linux kernel and older kernels +// lack support. If the running kernel does not support Tsync and it is +// requested in a filter, Libseccomp will not enable TSync support and will +// proceed as normal. +func (f *ScmpFilter) GetTsyncBit() (bool, error) { + tSync, err := f.getFilterAttr(filterAttrTsync, true) + if err != nil { + return false, err + } + + if tSync == 0 { + return false, nil + } + + return true, nil +} + +// Set the default action taken on a syscall for an architecture not in the +// filter, or an error if an issue was encountered setting the value. +func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error { + if err := sanitizeAction(action); err != nil { + return err + } + + return f.setFilterAttr(filterAttrActBadArch, action.toNative()) +} + +// Set the state of the No New Privileges bit, which will be applied on filter +// load, or an error if an issue was encountered setting the value. +// Filters with No New Privileges set to 0 can only be loaded with the +// CAP_SYS_ADMIN privilege. +func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error { + var toSet C.uint32_t = 0x0 + + if state { + toSet = 0x1 + } + + return f.setFilterAttr(filterAttrNNP, toSet) +} + +// Sets whether Thread Synchronization will be enabled on the filter being +// loaded. Returns an error if setting Tsync failed, or the filter is invalid. +// Thread Sync ensures that all members of the thread group of the calling +// process will share the same Seccomp filter set. +// Tsync is a fairly recent addition to the Linux kernel and older kernels +// lack support. If the running kernel does not support Tsync and it is +// requested in a filter, Libseccomp will not enable TSync support and will +// proceed as normal. +func (f *ScmpFilter) SetTsync(enable bool) error { + var toSet C.uint32_t = 0x0 + + if enable { + toSet = 0x1 + } + + return f.setFilterAttr(filterAttrTsync, toSet) +} + +// Set a syscall's priority. +// This provides a hint to the filter generator in libseccomp about the +// importance of this syscall. High-priority syscalls are placed +// first in the filter code, and incur less overhead (at the expense of +// lower-priority syscalls). +func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return fmt.Errorf("Filter is invalid or uninitialized") + } + + if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call), + C.uint8_t(priority)); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Add a single rule for an unconditional action on a syscall. +// Accepts the number of the syscall and the action to be taken on the call +// being made. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error { + return f.addRuleGeneric(call, action, false, nil) +} + +// Add a single rule for an unconditional action on a syscall. +// Accepts the number of the syscall and the action to be taken on the call +// being made. +// No modifications will be made to the rule, and it will fail to add if it +// cannot be applied to the current architecture without modification. +// The rule will function exactly as described, but it may not function identically +// (or be able to be applied to) all architectures. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error { + return f.addRuleGeneric(call, action, true, nil) +} + +// Add a single rule for a conditional action on a syscall. +// Returns an error if an issue was encountered adding the rule. +// All conditions must match for the rule to match. +func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { + return f.addRuleGeneric(call, action, false, conds) +} + +// Add a single rule for a conditional action on a syscall. +// No modifications will be made to the rule, and it will fail to add if it +// cannot be applied to the current architecture without modification. +// The rule will function exactly as described, but it may not function identically +// (or be able to be applied to) all architectures. +// Returns an error if an issue was encountered adding the rule. +func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { + return f.addRuleGeneric(call, action, true, conds) +} + +// Output PFC-formatted, human-readable dump of a filter context's rules to a +// file. +// Accepts file to write to (must be open for writing). +// Returns an error if writing to the file fails. +func (f *ScmpFilter) ExportPFC(file *os.File) error { + f.lock.Lock() + defer f.lock.Unlock() + + fd := file.Fd() + + if !f.valid { + return fmt.Errorf("Filter is invalid or uninitialized") + } + + if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Output Berkeley Packet Filter-formatted, kernel-readable dump of a filter +// context's rules to a file. +// Accepts file to write to (must be open for writing). +// Returns an error if writing to the file fails. +func (f *ScmpFilter) ExportBPF(file *os.File) error { + f.lock.Lock() + defer f.lock.Unlock() + + fd := file.Fd() + + if !f.valid { + return fmt.Errorf("Filter is invalid or uninitialized") + } + + if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} diff --git a/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/seccomp_internal.go b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/seccomp_internal.go new file mode 100644 index 00000000..b34cc74e --- /dev/null +++ b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/seccomp_internal.go @@ -0,0 +1,412 @@ +// +build linux + +// Internal functions for libseccomp Go bindings +// No exported functions + +package seccomp + +import ( + "fmt" + "os" + "syscall" +) + +// Unexported C wrapping code - provides the C-Golang interface +// Get the seccomp header in scope +// Need stdlib.h for free() on cstrings + +// #cgo LDFLAGS: -lseccomp +/* +#include +#include + +#if SCMP_VER_MAJOR < 1 +#error Minimum supported version of Libseccomp is v2.2.1 +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2 +#error Minimum supported version of Libseccomp is v2.2.1 +#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR == 2 && SCMP_VER_MICRO < 1 +#error Minimum supported version of Libseccomp is v2.2.1 +#endif + +const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE; +const uint32_t C_ARCH_X86 = SCMP_ARCH_X86; +const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64; +const uint32_t C_ARCH_X32 = SCMP_ARCH_X32; +const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM; +const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64; +const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS; +const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64; +const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32; +const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL; +const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64; +const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32; + +const uint32_t C_ACT_KILL = SCMP_ACT_KILL; +const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP; +const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0); +const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0); +const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW; + +const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT; +const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH; +const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP; +const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC; + +const int C_CMP_NE = (int)SCMP_CMP_NE; +const int C_CMP_LT = (int)SCMP_CMP_LT; +const int C_CMP_LE = (int)SCMP_CMP_LE; +const int C_CMP_EQ = (int)SCMP_CMP_EQ; +const int C_CMP_GE = (int)SCMP_CMP_GE; +const int C_CMP_GT = (int)SCMP_CMP_GT; +const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ; + +const int C_VERSION_MAJOR = SCMP_VER_MAJOR; +const int C_VERSION_MINOR = SCMP_VER_MINOR; +const int C_VERSION_MICRO = SCMP_VER_MICRO; + +typedef struct scmp_arg_cmp* scmp_cast_t; + +// Wrapper to create an scmp_arg_cmp struct +void* +make_struct_arg_cmp( + unsigned int arg, + int compare, + uint64_t a, + uint64_t b + ) +{ + struct scmp_arg_cmp *s = malloc(sizeof(struct scmp_arg_cmp)); + + s->arg = arg; + s->op = compare; + s->datum_a = a; + s->datum_b = b; + + return s; +} +*/ +import "C" + +// Nonexported types +type scmpFilterAttr uint32 + +// Nonexported constants + +const ( + filterAttrActDefault scmpFilterAttr = iota + filterAttrActBadArch scmpFilterAttr = iota + filterAttrNNP scmpFilterAttr = iota + filterAttrTsync scmpFilterAttr = iota +) + +const ( + // An error return from certain libseccomp functions + scmpError C.int = -1 + // Comparison boundaries to check for architecture validity + archStart ScmpArch = ArchNative + archEnd ScmpArch = ArchMIPSEL64N32 + // Comparison boundaries to check for action validity + actionStart ScmpAction = ActKill + actionEnd ScmpAction = ActAllow + // Comparison boundaries to check for comparison operator validity + compareOpStart ScmpCompareOp = CompareNotEqual + compareOpEnd ScmpCompareOp = CompareMaskedEqual +) + +// Nonexported functions + +// Exit with error, as provided version of Libseccomp is too low +func errorOnVersionTooLow() { + fmt.Fprintf(os.Stderr, "Libseccomp version too low: minimum supported is 2.2.1, detected %d.%d.%d", C.C_VERSION_MAJOR, C.C_VERSION_MINOR, C.C_VERSION_MICRO) + os.Exit(-1) +} + +// Init function: Verify library version is appropriate +func init() { + // No versions of the 1.x library are supported + if C.C_VERSION_MAJOR < 2 { + errorOnVersionTooLow() + } + + // Versions 2.0 and 2.1 are not supported + if C.C_VERSION_MAJOR == 2 && C.C_VERSION_MINOR < 2 { + errorOnVersionTooLow() + } + + // Version 2.2.0 is not supported - need at least 2.2.1 + if C.C_VERSION_MAJOR == 2 && C.C_VERSION_MINOR == 2 && C.C_VERSION_MICRO < 1 { + errorOnVersionTooLow() + } +} + +// Filter helpers + +// Filter finalizer - ensure that kernel context for filters is freed +func filterFinalizer(f *ScmpFilter) { + f.Release() +} + +// Get a raw filter attribute +func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr, lock bool) (C.uint32_t, error) { + if lock { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return 0x0, fmt.Errorf("Filter is invalid or uninitialized") + } + } + + var attribute C.uint32_t + + retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute) + if retCode != 0 { + return 0x0, syscall.Errno(-1 * retCode) + } + + return attribute, nil +} + +// Set a raw filter attribute +func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error { + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return fmt.Errorf("Filter is invalid or uninitialized") + } + + retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value) + if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// DOES NOT LOCK OR CHECK VALIDITY +// Assumes caller has already done this +// Wrapper for seccomp_rule_add_... functions +func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, cond C.scmp_cast_t) error { + var length C.uint + if cond != nil { + length = 1 + } else { + length = 0 + } + + var retCode C.int + if exact { + retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond) + } else { + retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond) + } + + if syscall.Errno(-1*retCode) == syscall.EFAULT { + return fmt.Errorf("Unrecognized syscall") + } else if syscall.Errno(-1*retCode) == syscall.EPERM { + return fmt.Errorf("Requested action matches default action of filter") + } else if retCode != 0 { + return syscall.Errno(-1 * retCode) + } + + return nil +} + +// Generic add function for filter rules +func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error { + + f.lock.Lock() + defer f.lock.Unlock() + + if !f.valid { + return fmt.Errorf("Filter is invalid or uninitialized") + } + + if len(conds) == 0 { + if err := f.addRuleWrapper(call, action, exact, nil); err != nil { + return err + } + } else { + for _, cond := range conds { + cmpStruct := C.make_struct_arg_cmp(C.uint(cond.Argument), cond.Op.toNative(), C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2)) + defer C.free(cmpStruct) + + if err := f.addRuleWrapper(call, action, exact, C.scmp_cast_t(cmpStruct)); err != nil { + return err + } + } + } + + return nil +} + +// Generic Helpers + +// Helper - Sanitize Arch token input +func sanitizeArch(in ScmpArch) error { + if in < archStart || in > archEnd { + return fmt.Errorf("Unrecognized architecture") + } + + return nil +} + +func sanitizeAction(in ScmpAction) error { + inTmp := in & 0x0000FFFF + if inTmp < actionStart || inTmp > actionEnd { + return fmt.Errorf("Unrecognized action") + } + + if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 { + return fmt.Errorf("Highest 16 bits must be zeroed except for Trace " + + "and Errno") + } + + return nil +} + +func sanitizeCompareOp(in ScmpCompareOp) error { + if in < compareOpStart || in > compareOpEnd { + return fmt.Errorf("Unrecognized comparison operator") + } + + return nil +} + +func archFromNative(a C.uint32_t) (ScmpArch, error) { + switch a { + case C.C_ARCH_X86: + return ArchX86, nil + case C.C_ARCH_X86_64: + return ArchAMD64, nil + case C.C_ARCH_X32: + return ArchX32, nil + case C.C_ARCH_ARM: + return ArchARM, nil + case C.C_ARCH_NATIVE: + return ArchNative, nil + case C.C_ARCH_AARCH64: + return ArchARM64, nil + case C.C_ARCH_MIPS: + return ArchMIPS, nil + case C.C_ARCH_MIPS64: + return ArchMIPS64, nil + case C.C_ARCH_MIPS64N32: + return ArchMIPS64N32, nil + case C.C_ARCH_MIPSEL: + return ArchMIPSEL, nil + case C.C_ARCH_MIPSEL64: + return ArchMIPSEL64, nil + case C.C_ARCH_MIPSEL64N32: + return ArchMIPSEL64N32, nil + default: + return 0x0, fmt.Errorf("Unrecognized architecture") + } +} + +// Only use with sanitized arches, no error handling +func (a ScmpArch) toNative() C.uint32_t { + switch a { + case ArchX86: + return C.C_ARCH_X86 + case ArchAMD64: + return C.C_ARCH_X86_64 + case ArchX32: + return C.C_ARCH_X32 + case ArchARM: + return C.C_ARCH_ARM + case ArchARM64: + return C.C_ARCH_AARCH64 + case ArchMIPS: + return C.C_ARCH_MIPS + case ArchMIPS64: + return C.C_ARCH_MIPS64 + case ArchMIPS64N32: + return C.C_ARCH_MIPS64N32 + case ArchMIPSEL: + return C.C_ARCH_MIPSEL + case ArchMIPSEL64: + return C.C_ARCH_MIPSEL64 + case ArchMIPSEL64N32: + return C.C_ARCH_MIPSEL64N32 + case ArchNative: + return C.C_ARCH_NATIVE + default: + return 0x0 + } +} + +// Only use with sanitized ops, no error handling +func (a ScmpCompareOp) toNative() C.int { + switch a { + case CompareNotEqual: + return C.C_CMP_NE + case CompareLess: + return C.C_CMP_LT + case CompareLessOrEqual: + return C.C_CMP_LE + case CompareEqual: + return C.C_CMP_EQ + case CompareGreaterEqual: + return C.C_CMP_GE + case CompareGreater: + return C.C_CMP_GT + case CompareMaskedEqual: + return C.C_CMP_MASKED_EQ + default: + return 0x0 + } +} + +func actionFromNative(a C.uint32_t) (ScmpAction, error) { + aTmp := a & 0xFFFF + switch a & 0xFFFF0000 { + case C.C_ACT_KILL: + return ActKill, nil + case C.C_ACT_TRAP: + return ActTrap, nil + case C.C_ACT_ERRNO: + return ActErrno.SetReturnCode(int16(aTmp)), nil + case C.C_ACT_TRACE: + return ActTrace.SetReturnCode(int16(aTmp)), nil + case C.C_ACT_ALLOW: + return ActAllow, nil + default: + return 0x0, fmt.Errorf("Unrecognized action") + } +} + +// Only use with sanitized actions, no error handling +func (a ScmpAction) toNative() C.uint32_t { + switch a & 0xFFFF { + case ActKill: + return C.C_ACT_KILL + case ActTrap: + return C.C_ACT_TRAP + case ActErrno: + return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16) + case ActTrace: + return C.C_ACT_TRACE | (C.uint32_t(a) >> 16) + case ActAllow: + return C.C_ACT_ALLOW + default: + return 0x0 + } +} + +// Internal only, assumes safe attribute +func (a scmpFilterAttr) toNative() uint32 { + switch a { + case filterAttrActDefault: + return uint32(C.C_ATTRIBUTE_DEFAULT) + case filterAttrActBadArch: + return uint32(C.C_ATTRIBUTE_BADARCH) + case filterAttrNNP: + return uint32(C.C_ATTRIBUTE_NNP) + case filterAttrTsync: + return uint32(C.C_ATTRIBUTE_TSYNC) + default: + return 0x0 + } +} diff --git a/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/seccomp_test.go b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/seccomp_test.go new file mode 100644 index 00000000..f94777cd --- /dev/null +++ b/Godeps/_workspace/src/github.com/seccomp/libseccomp-golang/seccomp_test.go @@ -0,0 +1,454 @@ +// +build linux + +// Tests for public API of libseccomp Go bindings + +package seccomp + +import ( + "fmt" + "syscall" + "testing" +) + +// Type Function Tests + +func TestActionSetReturnCode(t *testing.T) { + if ActInvalid.SetReturnCode(0x0010) != ActInvalid { + t.Errorf("Able to set a return code on invalid action!") + } + + codeSet := ActErrno.SetReturnCode(0x0001) + if codeSet == ActErrno || codeSet.GetReturnCode() != 0x0001 { + t.Errorf("Could not set return code on ActErrno") + } +} + +func TestSyscallGetName(t *testing.T) { + call1 := ScmpSyscall(0x1) + callFail := ScmpSyscall(0x999) + + name, err := call1.GetName() + if err != nil { + t.Errorf("Error getting syscall name for number 0x1") + } else if len(name) == 0 { + t.Errorf("Empty name returned for syscall 0x1") + } + fmt.Printf("Got name of syscall 0x1 on native arch as %s\n", name) + + _, err = callFail.GetName() + if err == nil { + t.Errorf("Getting nonexistant syscall should error!") + } +} + +func TestSyscallGetNameByArch(t *testing.T) { + call1 := ScmpSyscall(0x1) + callInvalid := ScmpSyscall(0x999) + archGood := ArchAMD64 + archBad := ArchInvalid + + name, err := call1.GetNameByArch(archGood) + if err != nil { + t.Errorf("Error getting syscall name for number 0x1 and arch AMD64") + } else if name != "write" { + t.Errorf("Got incorrect name for syscall 0x1 - expected write, got %s", name) + } + + _, err = call1.GetNameByArch(archBad) + if err == nil { + t.Errorf("Bad architecture GetNameByArch() should error!") + } + + _, err = callInvalid.GetNameByArch(archGood) + if err == nil { + t.Errorf("Bad syscall GetNameByArch() should error!") + } + + _, err = callInvalid.GetNameByArch(archBad) + if err == nil { + t.Errorf("Bad syscall and bad arch GetNameByArch() should error!") + } +} + +func TestGetSyscallFromName(t *testing.T) { + name1 := "write" + nameInval := "NOTASYSCALL" + + syscall, err := GetSyscallFromName(name1) + if err != nil { + t.Errorf("Error getting syscall number of write: %s", err) + } + fmt.Printf("Got syscall number of write on native arch as %d\n", syscall) + + _, err = GetSyscallFromName(nameInval) + if err == nil { + t.Errorf("Getting an invalid syscall should error!") + } +} + +func TestGetSyscallFromNameByArch(t *testing.T) { + name1 := "write" + nameInval := "NOTASYSCALL" + arch1 := ArchAMD64 + archInval := ArchInvalid + + syscall, err := GetSyscallFromNameByArch(name1, arch1) + if err != nil { + t.Errorf("Error getting syscall number of write on AMD64: %s", err) + } + fmt.Printf("Got syscall number of write on AMD64 as %d\n", syscall) + + _, err = GetSyscallFromNameByArch(nameInval, arch1) + if err == nil { + t.Errorf("Getting invalid syscall with valid arch should error") + } + + _, err = GetSyscallFromNameByArch(name1, archInval) + if err == nil { + t.Errorf("Getting valid syscall for invalid arch should error") + } + + _, err = GetSyscallFromNameByArch(nameInval, archInval) + if err == nil { + t.Errorf("Getting invalid syscall for invalid arch should error") + } +} + +func TestMakeCondition(t *testing.T) { + condition, err := MakeCondition(3, CompareNotEqual, 0x10) + if err != nil { + t.Errorf("Error making condition struct: %s", err) + } else if condition.Argument != 3 || condition.Operand1 != 0x10 || + condition.Operand2 != 0 || condition.Op != CompareNotEqual { + t.Errorf("Condition struct was filled incorrectly") + } + + condition, err = MakeCondition(3, CompareMaskedEqual, 0x10, 0x20) + if err != nil { + t.Errorf("Error making condition struct: %s", err) + } else if condition.Argument != 3 || condition.Operand1 != 0x10 || + condition.Operand2 != 0x20 || condition.Op != CompareMaskedEqual { + t.Errorf("Condition struct was filled incorrectly") + } + + _, err = MakeCondition(7, CompareNotEqual, 0x10) + if err == nil { + t.Errorf("Condition struct with bad syscall argument number should error") + } + + _, err = MakeCondition(3, CompareInvalid, 0x10) + if err == nil { + t.Errorf("Condition struct with bad comparison operator should error") + } + + _, err = MakeCondition(3, CompareMaskedEqual, 0x10, 0x20, 0x30) + if err == nil { + t.Errorf("MakeCondition with more than 2 arguments should fail") + } + + _, err = MakeCondition(3, CompareMaskedEqual) + if err == nil { + t.Errorf("MakeCondition with no arguments should fail") + } +} + +// Utility Function Tests + +func TestGetNativeArch(t *testing.T) { + arch, err := GetNativeArch() + if err != nil { + t.Errorf("GetNativeArch should not error!") + } + fmt.Printf("Got native arch of system as %s\n", arch.String()) +} + +// Filter Tests + +func TestFilterCreateRelease(t *testing.T) { + _, err := NewFilter(ActInvalid) + if err == nil { + t.Errorf("Can create filter with invalid action") + } + + filter, err := NewFilter(ActKill) + if err != nil { + t.Errorf("Error creating filter: %s", err) + } + + if !filter.IsValid() { + t.Errorf("Filter created by NewFilter was not valid") + } + + filter.Release() + + if filter.IsValid() { + t.Errorf("Filter is valid after being released") + } +} + +func TestFilterReset(t *testing.T) { + filter, err := NewFilter(ActKill) + if err != nil { + t.Errorf("Error creating filter: %s", err) + } + defer filter.Release() + + // Ensure the default action is ActKill + action, err := filter.GetDefaultAction() + if err != nil { + t.Errorf("Error getting default action of filter") + } else if action != ActKill { + t.Errorf("Default action of filter was set incorrectly!") + } + + // Reset with a different default action + err = filter.Reset(ActAllow) + if err != nil { + t.Errorf("Error resetting filter!") + } + + valid := filter.IsValid() + if !valid { + t.Errorf("Filter is no longer valid after reset!") + } + + // The default action should no longer be ActKill + action, err = filter.GetDefaultAction() + if err != nil { + t.Errorf("Error getting default action of filter") + } else if action != ActAllow { + t.Errorf("Default action of filter was set incorrectly!") + } +} + +func TestFilterArchFunctions(t *testing.T) { + filter, err := NewFilter(ActKill) + if err != nil { + t.Errorf("Error creating filter: %s", err) + } + defer filter.Release() + + arch, err := GetNativeArch() + if err != nil { + t.Errorf("Error getting native architecture: %s", err) + } + + present, err := filter.IsArchPresent(arch) + if err != nil { + t.Errorf("Error retrieving arch from filter: %s", err) + } else if !present { + t.Errorf("Filter does not contain native architecture by default") + } + + // Adding the native arch again should succeed, as it's already present + err = filter.AddArch(arch) + if err != nil { + t.Errorf("Adding arch to filter already containing it should succeed") + } + + // Make sure we don't add the native arch again + prospectiveArch := ArchX86 + if arch == ArchX86 { + prospectiveArch = ArchAMD64 + } + + // Check to make sure this other arch isn't in the filter + present, err = filter.IsArchPresent(prospectiveArch) + if err != nil { + t.Errorf("Error retrieving arch from filter: %s", err) + } else if present { + t.Errorf("Arch not added to filter is present") + } + + // Try removing the nonexistant arch - should succeed + err = filter.RemoveArch(prospectiveArch) + if err != nil { + t.Errorf("Error removing nonexistant arch: %s", err) + } + + // Add an arch, see if it's in the filter + err = filter.AddArch(prospectiveArch) + if err != nil { + t.Errorf("Could not add arch %s to filter: %s", + prospectiveArch.String(), err) + } + + present, err = filter.IsArchPresent(prospectiveArch) + if err != nil { + t.Errorf("Error retrieving arch from filter: %s", err) + } else if !present { + t.Errorf("Filter does not contain architecture %s after it was added", + prospectiveArch.String()) + } + + // Remove the arch again, make sure it's not in the filter + err = filter.RemoveArch(prospectiveArch) + if err != nil { + t.Errorf("Could not remove arch %s from filter: %s", + prospectiveArch.String(), err) + } + + present, err = filter.IsArchPresent(prospectiveArch) + if err != nil { + t.Errorf("Error retrieving arch from filter: %s", err) + } else if present { + t.Errorf("Filter contains architecture %s after it was removed", + prospectiveArch.String()) + } +} + +func TestFilterAttributeGettersAndSetters(t *testing.T) { + filter, err := NewFilter(ActKill) + if err != nil { + t.Errorf("Error creating filter: %s", err) + } + defer filter.Release() + + act, err := filter.GetDefaultAction() + if err != nil { + t.Errorf("Error getting default action: %s", err) + } else if act != ActKill { + t.Errorf("Default action was set incorrectly") + } + + err = filter.SetBadArchAction(ActAllow) + if err != nil { + t.Errorf("Error setting bad arch action: %s", err) + } + + act, err = filter.GetBadArchAction() + if err != nil { + t.Errorf("Error getting bad arch action") + } else if act != ActAllow { + t.Errorf("Bad arch action was not set correcly!") + } + + err = filter.SetNoNewPrivsBit(false) + if err != nil { + t.Errorf("Error setting no new privileges bit") + } + + privs, err := filter.GetNoNewPrivsBit() + if err != nil { + t.Errorf("Error getting no new privileges bit!") + } else if privs != false { + t.Errorf("No new privileges bit was not set correctly") + } + + err = filter.SetBadArchAction(ActInvalid) + if err == nil { + t.Errorf("Setting bad arch action to an invalid action should error") + } +} + +func TestMergeFilters(t *testing.T) { + filter1, err := NewFilter(ActAllow) + if err != nil { + t.Errorf("Error creating filter: %s", err) + } + + filter2, err := NewFilter(ActAllow) + if err != nil { + t.Errorf("Error creating filter: %s", err) + } + + // Need to remove the native arch and add another to the second filter + // Filters must NOT share architectures to be successfully merged + nativeArch, err := GetNativeArch() + if err != nil { + t.Errorf("Error getting native arch: %s", err) + } + + prospectiveArch := ArchAMD64 + if nativeArch == ArchAMD64 { + prospectiveArch = ArchX86 + } + + err = filter2.AddArch(prospectiveArch) + if err != nil { + t.Errorf("Error adding architecture to filter: %s", err) + } + + err = filter2.RemoveArch(nativeArch) + if err != nil { + t.Errorf("Error removing architecture from filter: %s", err) + } + + err = filter1.Merge(filter2) + if err != nil { + t.Errorf("Error merging filters: %s", err) + } + + if filter2.IsValid() { + t.Errorf("Source filter should not be valid after merging") + } + + filter3, err := NewFilter(ActKill) + if err != nil { + t.Errorf("Error creating filter: %s", err) + } + defer filter3.Release() + + err = filter1.Merge(filter3) + if err == nil { + t.Errorf("Attributes should have to match to merge filters") + } +} + +func TestRuleAddAndLoad(t *testing.T) { + // Test #1: Add a trivial filter + filter1, err := NewFilter(ActAllow) + if err != nil { + t.Errorf("Error creating filter: %s", err) + } + defer filter1.Release() + + call, err := GetSyscallFromName("getpid") + if err != nil { + t.Errorf("Error getting syscall number of getpid: %s", err) + } + + call2, err := GetSyscallFromName("setreuid") + if err != nil { + t.Errorf("Error getting syscall number of setreuid: %s", err) + } + + uid := syscall.Getuid() + euid := syscall.Geteuid() + + err = filter1.AddRule(call, ActErrno.SetReturnCode(0x1)) + if err != nil { + t.Errorf("Error adding rule to restrict syscall: %s", err) + } + + cond, err := MakeCondition(1, CompareEqual, uint64(euid)) + if err != nil { + t.Errorf("Error making rule to restrict syscall: %s", err) + } + + cond2, err := MakeCondition(0, CompareEqual, uint64(uid)) + if err != nil { + t.Errorf("Error making rule to restrict syscall: %s", err) + } + + conditions := []ScmpCondition{cond, cond2} + + err = filter1.AddRuleConditional(call2, ActErrno.SetReturnCode(0x2), conditions) + + err = filter1.Load() + if err != nil { + t.Errorf("Error loading filter: %s", err) + } + + // Try making a simple syscall, it should error + pid := syscall.Getpid() + if pid != -1 { + t.Errorf("Syscall should have returned error code!") + } + + // Try making a Geteuid syscall that should normally succeed + err = syscall.Setreuid(uid, euid) + if err != syscall.Errno(2) { + t.Errorf("Syscall should have returned error code!") + } +} diff --git a/Makefile b/Makefile index 09d28f28..bd5dd88a 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,11 @@ RUNC_TEST_IMAGE=runc_test PROJECT=github.com/opencontainers/runc TEST_DOCKERFILE=script/test_Dockerfile +BUILD_TAGS=seccomp export GOPATH:=$(CURDIR)/Godeps/_workspace:$(GOPATH) all: - go build -o runc . + go build -tags $(BUILD_TAGS) -o runc . vet: go get golang.org/x/tools/cmd/vet @@ -20,7 +21,7 @@ test: runctestimage docker run -e TESTFLAGS --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_TEST_IMAGE) make localtest localtest: - go test ${TESTFLAGS} -v ./... + go test -tags $(BUILD_TAGS) $(TESTFLAGS) -v ./... install: cp runc /usr/local/bin/runc diff --git a/libcontainer/configs/config.go b/libcontainer/configs/config.go index 83381c84..42aafbf0 100644 --- a/libcontainer/configs/config.go +++ b/libcontainer/configs/config.go @@ -13,36 +13,46 @@ type IDMap struct { Size int `json:"size"` } +// Seccomp represents syscall restrictions type Seccomp struct { - Syscalls []*Syscall `json:"syscalls"` + DefaultAction Action `json:"default_action"` + Syscalls []*Syscall `json:"syscalls"` } +// An action to be taken upon rule match in Seccomp type Action int const ( - Kill Action = iota - 3 + Kill Action = iota - 4 + Errno Trap Allow ) +// A comparison operator to be used when matching syscall arguments in Seccomp type Operator int const ( EqualTo Operator = iota NotEqualTo - GreatherThan + GreaterThan + GreaterThanOrEqualTo LessThan + LessThanOrEqualTo MaskEqualTo ) +// A rule to match a specific syscall argument in Seccomp type Arg struct { - Index int `json:"index"` - Value uint32 `json:"value"` - Op Operator `json:"op"` + Index uint `json:"index"` + Value uint64 `json:"value"` + ValueTwo uint64 `json:"value_two"` + Op Operator `json:"op"` } +// An rule to match a syscall in Seccomp type Syscall struct { - Value int `json:"value"` + Name string `json:"name"` Action Action `json:"action"` Args []*Arg `json:"args"` } @@ -140,7 +150,7 @@ type Config struct { Sysctl map[string]string `json:"sysctl"` // Seccomp allows actions to be taken whenever a syscall is made within the container. - // By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno - // can be specified on a per syscall basis. + // A number of rules are given, each having an action to be taken if a syscall matches it. + // A default action to be taken if no rules match is also given. Seccomp *Seccomp `json:"seccomp"` } diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index fd124f6d..16377ff6 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -13,7 +13,6 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/netlink" - "github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runc/libcontainer/utils" @@ -270,61 +269,3 @@ func killCgroupProcesses(m cgroups.Manager) error { } return nil } - -func finalizeSeccomp(config *initConfig) error { - if config.Config.Seccomp == nil { - return nil - } - context := seccomp.New() - for _, s := range config.Config.Seccomp.Syscalls { - ss := &seccomp.Syscall{ - Value: uint32(s.Value), - Action: seccompAction(s.Action), - } - if len(s.Args) > 0 { - ss.Args = seccompArgs(s.Args) - } - context.Add(ss) - } - return context.Load() -} - -func seccompAction(a configs.Action) seccomp.Action { - switch a { - case configs.Kill: - return seccomp.Kill - case configs.Trap: - return seccomp.Trap - case configs.Allow: - return seccomp.Allow - } - return seccomp.Error(syscall.Errno(int(a))) -} - -func seccompArgs(args []*configs.Arg) seccomp.Args { - var sa []seccomp.Arg - for _, a := range args { - sa = append(sa, seccomp.Arg{ - Index: uint32(a.Index), - Op: seccompOperator(a.Op), - Value: uint(a.Value), - }) - } - return seccomp.Args{sa} -} - -func seccompOperator(o configs.Operator) seccomp.Operator { - switch o { - case configs.EqualTo: - return seccomp.EqualTo - case configs.NotEqualTo: - return seccomp.NotEqualTo - case configs.GreatherThan: - return seccomp.GreatherThan - case configs.LessThan: - return seccomp.LessThan - case configs.MaskEqualTo: - return seccomp.MaskEqualTo - } - return 0 -} diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index eb7019d6..e61c32b2 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -793,33 +793,6 @@ func TestSysctl(t *testing.T) { } } -func TestSeccompNoChown(t *testing.T) { - if testing.Short() { - return - } - rootfs, err := newRootfs() - if err != nil { - t.Fatal(err) - } - defer remove(rootfs) - config := newTemplateConfig(rootfs) - config.Seccomp = &configs.Seccomp{} - config.Seccomp.Syscalls = append(config.Seccomp.Syscalls, &configs.Syscall{ - Value: syscall.SYS_CHOWN, - Action: configs.Action(syscall.EPERM), - }) - buffers, _, err := runContainer(config, "", "/bin/sh", "-c", "chown 1:1 /tmp") - if err == nil { - t.Fatal("running chown in a container should fail") - } - if buffers == nil { - t.Fatalf("Container wasn't even created: %v", err) - } - if s := buffers.String(); !strings.Contains(s, "not permitted") { - t.Fatalf("running chown should result in an EPERM but got %q", s) - } -} - func TestMountCgroupRO(t *testing.T) { if testing.Short() { return diff --git a/libcontainer/integration/seccomp_test.go b/libcontainer/integration/seccomp_test.go new file mode 100644 index 00000000..94901861 --- /dev/null +++ b/libcontainer/integration/seccomp_test.go @@ -0,0 +1,208 @@ +// +build seccomp,linux,cgo + +package integration + +import ( + "strings" + "syscall" + "testing" + + "github.com/opencontainers/runc/libcontainer" + "github.com/opencontainers/runc/libcontainer/configs" +) + +func TestSeccompDenyGetcwd(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "getcwd", + Action: configs.Errno, + }, + }, + } + + container, err := newContainer(config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + buffers := newStdBuffers() + pwd := &libcontainer.Process{ + Args: []string{"pwd"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + } + + err = container.Start(pwd) + if err != nil { + t.Fatal(err) + } + ps, err := pwd.Wait() + if err == nil { + t.Fatal("Expecting error (negative return code); instead exited cleanly!") + } + + var exitCode int + status := ps.Sys().(syscall.WaitStatus) + if status.Exited() { + exitCode = status.ExitStatus() + } else if status.Signaled() { + exitCode = -int(status.Signal()) + } else { + t.Fatalf("Unrecognized exit reason!") + } + + if exitCode == 0 { + t.Fatalf("Getcwd should fail with negative exit code, instead got %d!", exitCode) + } + + expected := "pwd: getcwd: Operation not permitted" + actual := strings.Trim(buffers.Stderr.String(), "\n") + if actual != expected { + t.Fatalf("Expected output %s but got %s\n", expected, actual) + } +} + +func TestSeccompPermitWriteConditional(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "write", + Action: configs.Errno, + Args: []*configs.Arg{ + { + Index: 0, + Value: 1, + Op: configs.GreaterThan, + }, + }, + }, + }, + } + + container, err := newContainer(config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + buffers := newStdBuffers() + dmesg := &libcontainer.Process{ + Args: []string{"busybox", "ls", "/"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + } + + err = container.Start(dmesg) + if err != nil { + t.Fatal(err) + } + if _, err := dmesg.Wait(); err != nil { + t.Fatalf("%s: %s", err, buffers.Stderr) + } +} + +func TestSeccompDenyWriteConditional(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootfs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + config.Seccomp = &configs.Seccomp{ + DefaultAction: configs.Allow, + Syscalls: []*configs.Syscall{ + { + Name: "write", + Action: configs.Errno, + Args: []*configs.Arg{ + { + Index: 0, + Value: 1, + Op: configs.GreaterThan, + }, + }, + }, + }, + } + + container, err := newContainer(config) + if err != nil { + t.Fatal(err) + } + defer container.Destroy() + + buffers := newStdBuffers() + dmesg := &libcontainer.Process{ + Args: []string{"busybox", "ls", "does_not_exist"}, + Env: standardEnvironment, + Stdin: buffers.Stdin, + Stdout: buffers.Stdout, + Stderr: buffers.Stderr, + } + + err = container.Start(dmesg) + if err != nil { + t.Fatal(err) + } + + ps, err := dmesg.Wait() + if err == nil { + t.Fatal("Expecting negative return, instead got 0!") + } + + var exitCode int + status := ps.Sys().(syscall.WaitStatus) + if status.Exited() { + exitCode = status.ExitStatus() + } else if status.Signaled() { + exitCode = -int(status.Signal()) + } else { + t.Fatalf("Unrecognized exit reason!") + } + + if exitCode == 0 { + t.Fatalf("Busybox should fail with negative exit code, instead got %d!", exitCode) + } + + // We're denying write to stderr, so we expect an empty buffer + expected := "" + actual := strings.Trim(buffers.Stderr.String(), "\n") + if actual != expected { + t.Fatalf("Expected output %s but got %s\n", expected, actual) + } +} diff --git a/libcontainer/seccomp/bpf.go b/libcontainer/seccomp/bpf.go deleted file mode 100644 index 65908ecc..00000000 --- a/libcontainer/seccomp/bpf.go +++ /dev/null @@ -1,34 +0,0 @@ -// +build linux - -package seccomp - -import "strings" - -type bpfLabel struct { - label string - location uint32 -} - -type bpfLabels []bpfLabel - -// labelIndex returns the index for the label if it exists in the slice. -// if it does not exist in the slice it appends the label lb to the end -// of the slice and returns the index. -func labelIndex(labels *bpfLabels, lb string) uint32 { - var id uint32 - for id = 0; id < uint32(len(*labels)); id++ { - if strings.EqualFold(lb, (*labels)[id].label) { - return id - } - } - *labels = append(*labels, bpfLabel{lb, 0xffffffff}) - return id -} - -func scmpBpfStmt(code uint16, k uint32) sockFilter { - return sockFilter{code, 0, 0, k} -} - -func scmpBpfJump(code uint16, k uint32, jt, jf uint8) sockFilter { - return sockFilter{code, jt, jf, k} -} diff --git a/libcontainer/seccomp/context.go b/libcontainer/seccomp/context.go deleted file mode 100644 index 6d0b7c3c..00000000 --- a/libcontainer/seccomp/context.go +++ /dev/null @@ -1,146 +0,0 @@ -// +build linux - -package seccomp - -import ( - "errors" - "syscall" -) - -const labelTemplate = "lb-%d-%d" - -// Action is the type of action that will be taken when a -// syscall is performed. -type Action int - -const ( - Kill Action = iota - 3 // Kill the calling process of the syscall. - Trap // Trap and coredump the calling process of the syscall. - Allow // Allow the syscall to be completed. -) - -// Syscall is the specified syscall, action, and any type of arguments -// to filter on. -type Syscall struct { - // Value is the syscall number. - Value uint32 - // Action is the action to perform when the specified syscall is made. - Action Action - // Args are filters that can be specified on the arguments to the syscall. - Args Args -} - -func (s *Syscall) scmpAction() uint32 { - switch s.Action { - case Allow: - return retAllow - case Trap: - return retTrap - case Kill: - return retKill - } - return actionErrno(uint32(s.Action)) -} - -// Arg represents an argument to the syscall with the argument's index, -// the operator to apply when matching, and the argument's value at that time. -type Arg struct { - Index uint32 // index of args which start from zero - Op Operator // operation, such as EQ/NE/GE/LE - Value uint // the value of arg -} - -type Args [][]Arg - -var ( - ErrUnresolvedLabel = errors.New("seccomp: unresolved label") - ErrDuplicateLabel = errors.New("seccomp: duplicate label use") - ErrUnsupportedOperation = errors.New("seccomp: unsupported operation for argument") -) - -// Error returns an Action that will be used to send the calling -// process the specified errno when the syscall is made. -func Error(code syscall.Errno) Action { - return Action(code) -} - -// New returns a new syscall context for use. -func New() *Context { - return &Context{ - syscalls: make(map[uint32]*Syscall), - } -} - -// Context holds syscalls for the current process to limit the type of -// actions the calling process can make. -type Context struct { - syscalls map[uint32]*Syscall -} - -// Add will add the specified syscall, action, and arguments to the seccomp -// Context. -func (c *Context) Add(s *Syscall) { - c.syscalls[s.Value] = s -} - -// Remove removes the specified syscall configuration from the Context. -func (c *Context) Remove(call uint32) { - delete(c.syscalls, call) -} - -// Load will apply the Context to the calling process makeing any secccomp process changes -// apply after the context is loaded. -func (c *Context) Load() error { - filter, err := c.newFilter() - if err != nil { - return err - } - if err := prctl(prSetNoNewPrivileges, 1, 0, 0, 0); err != nil { - return err - } - prog := newSockFprog(filter) - return prog.set() -} - -func (c *Context) newFilter() ([]sockFilter, error) { - var ( - labels bpfLabels - f = newFilter() - ) - for _, s := range c.syscalls { - f.addSyscall(s, &labels) - } - f.allow() - // process args for the syscalls - for _, s := range c.syscalls { - if err := f.addArguments(s, &labels); err != nil { - return nil, err - } - } - // apply labels for arguments - idx := int32(len(*f) - 1) - for ; idx >= 0; idx-- { - lf := &(*f)[idx] - if lf.code != (syscall.BPF_JMP + syscall.BPF_JA) { - continue - } - rel := int32(lf.jt)<<8 | int32(lf.jf) - if ((jumpJT << 8) | jumpJF) == rel { - if labels[lf.k].location == 0xffffffff { - return nil, ErrUnresolvedLabel - } - lf.k = labels[lf.k].location - uint32(idx+1) - lf.jt = 0 - lf.jf = 0 - } else if ((labelJT << 8) | labelJF) == rel { - if labels[lf.k].location != 0xffffffff { - return nil, ErrDuplicateLabel - } - labels[lf.k].location = uint32(idx) - lf.k = 0 - lf.jt = 0 - lf.jf = 0 - } - } - return *f, nil -} diff --git a/libcontainer/seccomp/filter.go b/libcontainer/seccomp/filter.go deleted file mode 100644 index 658fbddd..00000000 --- a/libcontainer/seccomp/filter.go +++ /dev/null @@ -1,118 +0,0 @@ -// +build linux - -package seccomp - -import ( - "fmt" - "syscall" - "unsafe" -) - -type sockFilter struct { - code uint16 - jt uint8 - jf uint8 - k uint32 -} - -func newFilter() *filter { - var f filter - f = append(f, sockFilter{ - pfLD + syscall.BPF_W + syscall.BPF_ABS, - 0, - 0, - uint32(unsafe.Offsetof(secData.nr)), - }) - return &f -} - -type filter []sockFilter - -func (f *filter) addSyscall(s *Syscall, labels *bpfLabels) { - if len(s.Args) == 0 { - f.call(s.Value, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction())) - } else { - if len(s.Args[0]) > 0 { - lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[0][0].Index) - f.call(s.Value, - scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), - jumpJT, jumpJF)) - } - } -} - -func (f *filter) addArguments(s *Syscall, labels *bpfLabels) error { - for i := 0; len(s.Args) > i; i++ { - if len(s.Args[i]) > 0 { - lb := fmt.Sprintf(labelTemplate, s.Value, s.Args[i][0].Index) - f.label(labels, lb) - f.arg(s.Args[i][0].Index) - } - for j := 0; j < len(s.Args[i]); j++ { - var jf sockFilter - if len(s.Args)-1 > i && len(s.Args[i+1]) > 0 { - lbj := fmt.Sprintf(labelTemplate, s.Value, s.Args[i+1][0].Index) - jf = scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, - labelIndex(labels, lbj), jumpJT, jumpJF) - } else { - jf = scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, s.scmpAction()) - } - if err := f.op(s.Args[i][j].Op, s.Args[i][j].Value, jf); err != nil { - return err - } - } - f.allow() - } - return nil -} - -func (f *filter) label(labels *bpfLabels, lb string) { - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), labelJT, labelJF)) -} - -func (f *filter) call(nr uint32, jt sockFilter) { - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, nr, 0, 1)) - *f = append(*f, jt) -} - -func (f *filter) allow() { - *f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retAllow)) -} - -func (f *filter) deny() { - *f = append(*f, scmpBpfStmt(syscall.BPF_RET+syscall.BPF_K, retTrap)) -} - -func (f *filter) arg(index uint32) { - arg(f, index) -} - -func (f *filter) op(operation Operator, v uint, jf sockFilter) error { - switch operation { - case EqualTo: - jumpEqualTo(f, v, jf) - case NotEqualTo: - jumpNotEqualTo(f, v, jf) - case GreatherThan: - jumpGreaterThan(f, v, jf) - case LessThan: - jumpLessThan(f, v, jf) - case MaskEqualTo: - jumpMaskEqualTo(f, v, jf) - default: - return ErrUnsupportedOperation - } - return nil -} - -func arg(f *filter, idx uint32) { - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.low(idx))) - *f = append(*f, scmpBpfStmt(syscall.BPF_ST, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_W+syscall.BPF_ABS, endian.hi(idx))) - *f = append(*f, scmpBpfStmt(syscall.BPF_ST, 1)) -} - -func jump(f *filter, labels *bpfLabels, lb string) { - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JA, labelIndex(labels, lb), - jumpJT, jumpJF)) -} diff --git a/libcontainer/seccomp/jump_amd64.go b/libcontainer/seccomp/jump_amd64.go deleted file mode 100644 index f0d07716..00000000 --- a/libcontainer/seccomp/jump_amd64.go +++ /dev/null @@ -1,68 +0,0 @@ -// +build linux,amd64 - -package seccomp - -// Using BPF filters -// -// ref: http://www.gsp.com/cgi-bin/man.cgi?topic=bpf -import "syscall" - -func jumpGreaterThan(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 4, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGE+syscall.BPF_K, (lo), 0, 2)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} - -func jumpEqualTo(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 5)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (lo), 0, 2)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} - -func jumpLessThan(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (hi), 6, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, (hi), 0, 3)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JGT+syscall.BPF_K, (lo), 2, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} - -func jumpNotEqualTo(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 5, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 2, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} - -// this checks for a value inside a mask. The evalusation is equal to doing -// CLONE_NEWUSER & syscallMask == CLONE_NEWUSER -func jumpMaskEqualTo(f *filter, v uint, jt sockFilter) { - lo := uint32(uint64(v) % 0x100000000) - hi := uint32(uint64(v) / 0x100000000) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, hi, 0, 6)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 0)) - *f = append(*f, scmpBpfStmt(syscall.BPF_ALU+syscall.BPF_AND, uint32(v))) - *f = append(*f, scmpBpfJump(syscall.BPF_JMP+syscall.BPF_JEQ+syscall.BPF_K, lo, 0, 2)) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) - *f = append(*f, jt) - *f = append(*f, scmpBpfStmt(syscall.BPF_LD+syscall.BPF_MEM, 1)) -} diff --git a/libcontainer/seccomp/seccomp_linux.go b/libcontainer/seccomp/seccomp_linux.go new file mode 100644 index 00000000..58bdbf6d --- /dev/null +++ b/libcontainer/seccomp/seccomp_linux.go @@ -0,0 +1,165 @@ +// +build linux,cgo,seccomp + +package seccomp + +import ( + "fmt" + "log" + "syscall" + + "github.com/opencontainers/runc/libcontainer/configs" + libseccomp "github.com/seccomp/libseccomp-golang" +) + +var ( + actAllow = libseccomp.ActAllow + actTrap = libseccomp.ActTrap + actKill = libseccomp.ActKill + actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM)) +) + +// Filters given syscalls in a container, preventing them from being used +// Started in the container init process, and carried over to all child processes +// Setns calls, however, require a separate invocation, as they are not children +// of the init until they join the namespace +func InitSeccomp(config *configs.Seccomp) error { + if config == nil { + return fmt.Errorf("cannot initialize Seccomp - nil config passed") + } + + defaultAction, err := getAction(config.DefaultAction) + if err != nil { + return fmt.Errorf("error initializing seccomp - invalid default action") + } + + filter, err := libseccomp.NewFilter(defaultAction) + if err != nil { + return fmt.Errorf("error creating filter: %s", err) + } + + // Unset no new privs bit + if err := filter.SetNoNewPrivsBit(false); err != nil { + return fmt.Errorf("error setting no new privileges: %s", err) + } + + // Add a rule for each syscall + for _, call := range config.Syscalls { + if call == nil { + return fmt.Errorf("encountered nil syscall while initializing Seccomp") + } + + if err = matchCall(filter, call); err != nil { + return err + } + } + + if err = filter.Load(); err != nil { + return fmt.Errorf("error loading seccomp filter into kernel: %s", err) + } + + return nil +} + +// Convert Libcontainer Action to Libseccomp ScmpAction +func getAction(act configs.Action) (libseccomp.ScmpAction, error) { + switch act { + case configs.Kill: + return actKill, nil + case configs.Errno: + return actErrno, nil + case configs.Trap: + return actTrap, nil + case configs.Allow: + return actAllow, nil + default: + return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule") + } +} + +// Convert Libcontainer Operator to Libseccomp ScmpCompareOp +func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) { + switch op { + case configs.EqualTo: + return libseccomp.CompareEqual, nil + case configs.NotEqualTo: + return libseccomp.CompareNotEqual, nil + case configs.GreaterThan: + return libseccomp.CompareGreater, nil + case configs.GreaterThanOrEqualTo: + return libseccomp.CompareGreaterEqual, nil + case configs.LessThan: + return libseccomp.CompareLess, nil + case configs.LessThanOrEqualTo: + return libseccomp.CompareLessOrEqual, nil + case configs.MaskEqualTo: + return libseccomp.CompareMaskedEqual, nil + default: + return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule") + } +} + +// Convert Libcontainer Arg to Libseccomp ScmpCondition +func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { + cond := libseccomp.ScmpCondition{} + + if arg == nil { + return cond, fmt.Errorf("cannot convert nil to syscall condition") + } + + op, err := getOperator(arg.Op) + if err != nil { + return cond, err + } + + return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo) +} + +// Add a rule to match a single syscall +func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { + if call == nil || filter == nil { + return fmt.Errorf("cannot use nil as syscall to block") + } + + if len(call.Name) == 0 { + return fmt.Errorf("empty string is not a valid syscall") + } + + // If we can't resolve the syscall, assume it's not supported on this kernel + // Ignore it, don't error out + callNum, err := libseccomp.GetSyscallFromName(call.Name) + if err != nil { + log.Printf("Error resolving syscall name %s: %s - ignoring syscall.", call.Name, err) + return nil + } + + // Convert the call's action to the libseccomp equivalent + callAct, err := getAction(call.Action) + if err != nil { + return err + } + + // Unconditional match - just add the rule + if len(call.Args) == 0 { + if err = filter.AddRule(callNum, callAct); err != nil { + return err + } + } else { + // Conditional match - convert the per-arg rules into library format + conditions := []libseccomp.ScmpCondition{} + + for _, cond := range call.Args { + newCond, err := getCondition(cond) + if err != nil { + return err + } + + conditions = append(conditions, newCond) + } + + if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil { + return err + } + } + + return nil +} diff --git a/libcontainer/seccomp/seccomp_unix.go b/libcontainer/seccomp/seccomp_unix.go deleted file mode 100644 index a68a4dcc..00000000 --- a/libcontainer/seccomp/seccomp_unix.go +++ /dev/null @@ -1,124 +0,0 @@ -// +build linux - -// Package seccomp provides native seccomp ( https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt ) support for go. -package seccomp - -import ( - "syscall" - "unsafe" -) - -// Operator that is used for argument comparison. -type Operator int - -const ( - EqualTo Operator = iota - NotEqualTo - GreatherThan - LessThan - MaskEqualTo -) - -const ( - jumpJT = 0xff - jumpJF = 0xff - labelJT = 0xfe - labelJF = 0xfe -) - -const ( - pfLD = 0x0 - retKill = 0x00000000 - retTrap = 0x00030000 - retAllow = 0x7fff0000 - modeFilter = 0x2 - prSetNoNewPrivileges = 0x26 -) - -func actionErrno(errno uint32) uint32 { - return 0x00050000 | (errno & 0x0000ffff) -} - -var ( - secData = struct { - nr int32 - arch uint32 - insPointer uint64 - args [6]uint64 - }{0, 0, 0, [6]uint64{0, 0, 0, 0, 0, 0}} -) - -var isLittle = func() bool { - var ( - x = 0x1234 - p = unsafe.Pointer(&x) - p2 = (*[unsafe.Sizeof(0)]byte)(p) - ) - if p2[0] == 0 { - return false - } - return true -}() - -var endian endianSupport - -type endianSupport struct { -} - -func (e endianSupport) hi(i uint32) uint32 { - if isLittle { - return e.little(i) - } - return e.big(i) -} - -func (e endianSupport) low(i uint32) uint32 { - if isLittle { - return e.big(i) - } - return e.little(i) -} - -func (endianSupport) big(idx uint32) uint32 { - if idx >= 6 { - return 0 - } - return uint32(unsafe.Offsetof(secData.args)) + 8*idx -} - -func (endianSupport) little(idx uint32) uint32 { - if idx < 0 || idx >= 6 { - return 0 - } - return uint32(unsafe.Offsetof(secData.args)) + - uint32(unsafe.Alignof(secData.args[0]))*idx + uint32(unsafe.Sizeof(secData.arch)) -} - -func prctl(option int, arg2, arg3, arg4, arg5 uintptr) error { - _, _, err := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) - if err != 0 { - return err - } - return nil -} - -func newSockFprog(filter []sockFilter) *sockFprog { - return &sockFprog{ - len: uint16(len(filter)), - filt: filter, - } -} - -type sockFprog struct { - len uint16 - filt []sockFilter -} - -func (s *sockFprog) set() error { - _, _, err := syscall.Syscall(syscall.SYS_PRCTL, uintptr(syscall.PR_SET_SECCOMP), - uintptr(modeFilter), uintptr(unsafe.Pointer(s))) - if err != 0 { - return err - } - return nil -} diff --git a/libcontainer/seccomp/seccomp_unsupported.go b/libcontainer/seccomp/seccomp_unsupported.go index 821dd57c..790d3b5a 100644 --- a/libcontainer/seccomp/seccomp_unsupported.go +++ b/libcontainer/seccomp/seccomp_unsupported.go @@ -1,3 +1,12 @@ -// +build !linux +// +build !linux !cgo !seccomp package seccomp + +import ( + "github.com/opencontainers/runc/libcontainer/configs" +) + +// Seccomp not supported, do nothing +func InitSeccomp(config *configs.Seccomp) error { + return nil +} diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index 334d3e25..f40a5ac2 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -7,6 +7,7 @@ import ( "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/label" + "github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/system" ) @@ -20,6 +21,11 @@ func (l *linuxSetnsInit) Init() error { if err := setupRlimits(l.config.Config); err != nil { return err } + if l.config.Config.Seccomp != nil { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return err + } + } if err := finalizeNamespace(l.config); err != nil { return err } diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index b399aa5d..86f434d0 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -9,6 +9,7 @@ import ( "github.com/opencontainers/runc/libcontainer/apparmor" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/label" + "github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/system" ) @@ -85,6 +86,11 @@ func (l *linuxStandardInit) Init() error { if err != nil { return err } + if l.config.Config.Seccomp != nil { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return err + } + } if err := finalizeNamespace(l.config); err != nil { return err } @@ -99,8 +105,5 @@ func (l *linuxStandardInit) Init() error { if syscall.Getppid() != l.parentPid { return syscall.Kill(syscall.Getpid(), syscall.SIGKILL) } - if err := finalizeSeccomp(l.config); err != nil { - return err - } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) } diff --git a/script/test_Dockerfile b/script/test_Dockerfile index 4f4206b2..6a9e9e71 100644 --- a/script/test_Dockerfile +++ b/script/test_Dockerfile @@ -1,7 +1,13 @@ FROM golang:1.4 RUN echo "deb http://ftp.us.debian.org/debian testing main contrib" >> /etc/apt/sources.list -RUN apt-get update && apt-get install -y iptables criu=1.6-2 && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y iptables criu=1.6-2 git build-essential autoconf libtool && rm -rf /var/lib/apt/lists/* + +# Need Libseccomp v2.2.1 minimum, but grab the latest version of the v2.2 releases +RUN git clone -b v2.2.3 --depth 1 https://github.com/seccomp/libseccomp /libseccomp +RUN cd /libseccomp && ./autogen.sh && ./configure && make && make check && make install +# Fix linking error +RUN cp /usr/local/lib/libseccomp.so /usr/lib/libseccomp.so.2 # setup a playground for us to spawn containers in RUN mkdir /busybox && \