runc/libcontainer/netlink/netlink_linux.go

1322 lines
30 KiB
Go

package netlink
import (
"encoding/binary"
"fmt"
"io"
"math/rand"
"net"
"os"
"sync/atomic"
"syscall"
"time"
"unsafe"
)
const (
IFNAMSIZ = 16
DEFAULT_CHANGE = 0xFFFFFFFF
IFLA_INFO_KIND = 1
IFLA_INFO_DATA = 2
VETH_INFO_PEER = 1
IFLA_MACVLAN_MODE = 1
IFLA_VLAN_ID = 1
IFLA_NET_NS_FD = 28
IFLA_ADDRESS = 1
IFLA_BRPORT_MODE = 4
SIOC_BRADDBR = 0x89a0
SIOC_BRDELBR = 0x89a1
SIOC_BRADDIF = 0x89a2
SIOC_BRDELIF = 0x89a3
)
const (
MACVLAN_MODE_PRIVATE = 1 << iota
MACVLAN_MODE_VEPA
MACVLAN_MODE_BRIDGE
MACVLAN_MODE_PASSTHRU
)
var nextSeqNr uint32
type ifreqHwaddr struct {
IfrnName [IFNAMSIZ]byte
IfruHwaddr syscall.RawSockaddr
}
type ifreqIndex struct {
IfrnName [IFNAMSIZ]byte
IfruIndex int32
}
type ifreqFlags struct {
IfrnName [IFNAMSIZ]byte
Ifruflags uint16
}
var native binary.ByteOrder
var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))
func init() {
var x uint32 = 0x01020304
if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
native = binary.BigEndian
} else {
native = binary.LittleEndian
}
}
func getIpFamily(ip net.IP) int {
if len(ip) <= net.IPv4len {
return syscall.AF_INET
}
if ip.To4() != nil {
return syscall.AF_INET
}
return syscall.AF_INET6
}
type NetlinkRequestData interface {
Len() int
ToWireFormat() []byte
}
type IfInfomsg struct {
syscall.IfInfomsg
}
func newIfInfomsg(family int) *IfInfomsg {
return &IfInfomsg{
IfInfomsg: syscall.IfInfomsg{
Family: uint8(family),
},
}
}
func newIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
msg := newIfInfomsg(family)
parent.children = append(parent.children, msg)
return msg
}
func (msg *IfInfomsg) ToWireFormat() []byte {
length := syscall.SizeofIfInfomsg
b := make([]byte, length)
b[0] = msg.Family
b[1] = 0
native.PutUint16(b[2:4], msg.Type)
native.PutUint32(b[4:8], uint32(msg.Index))
native.PutUint32(b[8:12], msg.Flags)
native.PutUint32(b[12:16], msg.Change)
return b
}
func (msg *IfInfomsg) Len() int {
return syscall.SizeofIfInfomsg
}
type IfAddrmsg struct {
syscall.IfAddrmsg
}
func newIfAddrmsg(family int) *IfAddrmsg {
return &IfAddrmsg{
IfAddrmsg: syscall.IfAddrmsg{
Family: uint8(family),
},
}
}
func (msg *IfAddrmsg) ToWireFormat() []byte {
length := syscall.SizeofIfAddrmsg
b := make([]byte, length)
b[0] = msg.Family
b[1] = msg.Prefixlen
b[2] = msg.Flags
b[3] = msg.Scope
native.PutUint32(b[4:8], msg.Index)
return b
}
func (msg *IfAddrmsg) Len() int {
return syscall.SizeofIfAddrmsg
}
type RtMsg struct {
syscall.RtMsg
}
func newRtMsg() *RtMsg {
return &RtMsg{
RtMsg: syscall.RtMsg{
Table: syscall.RT_TABLE_MAIN,
Scope: syscall.RT_SCOPE_UNIVERSE,
Protocol: syscall.RTPROT_BOOT,
Type: syscall.RTN_UNICAST,
},
}
}
func (msg *RtMsg) ToWireFormat() []byte {
length := syscall.SizeofRtMsg
b := make([]byte, length)
b[0] = msg.Family
b[1] = msg.Dst_len
b[2] = msg.Src_len
b[3] = msg.Tos
b[4] = msg.Table
b[5] = msg.Protocol
b[6] = msg.Scope
b[7] = msg.Type
native.PutUint32(b[8:12], msg.Flags)
return b
}
func (msg *RtMsg) Len() int {
return syscall.SizeofRtMsg
}
func rtaAlignOf(attrlen int) int {
return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1)
}
type RtAttr struct {
syscall.RtAttr
Data []byte
children []NetlinkRequestData
}
func newRtAttr(attrType int, data []byte) *RtAttr {
return &RtAttr{
RtAttr: syscall.RtAttr{
Type: uint16(attrType),
},
children: []NetlinkRequestData{},
Data: data,
}
}
func newRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
attr := newRtAttr(attrType, data)
parent.children = append(parent.children, attr)
return attr
}
func (a *RtAttr) Len() int {
if len(a.children) == 0 {
return (syscall.SizeofRtAttr + len(a.Data))
}
l := 0
for _, child := range a.children {
l += child.Len()
}
l += syscall.SizeofRtAttr
return rtaAlignOf(l + len(a.Data))
}
func (a *RtAttr) ToWireFormat() []byte {
length := a.Len()
buf := make([]byte, rtaAlignOf(length))
if a.Data != nil {
copy(buf[4:], a.Data)
} else {
next := 4
for _, child := range a.children {
childBuf := child.ToWireFormat()
copy(buf[next:], childBuf)
next += rtaAlignOf(len(childBuf))
}
}
if l := uint16(length); l != 0 {
native.PutUint16(buf[0:2], l)
}
native.PutUint16(buf[2:4], a.Type)
return buf
}
func uint32Attr(t int, n uint32) *RtAttr {
buf := make([]byte, 4)
native.PutUint32(buf, n)
return newRtAttr(t, buf)
}
type NetlinkRequest struct {
syscall.NlMsghdr
Data []NetlinkRequestData
}
func (rr *NetlinkRequest) ToWireFormat() []byte {
length := rr.Len
dataBytes := make([][]byte, len(rr.Data))
for i, data := range rr.Data {
dataBytes[i] = data.ToWireFormat()
length += uint32(len(dataBytes[i]))
}
b := make([]byte, length)
native.PutUint32(b[0:4], length)
native.PutUint16(b[4:6], rr.Type)
native.PutUint16(b[6:8], rr.Flags)
native.PutUint32(b[8:12], rr.Seq)
native.PutUint32(b[12:16], rr.Pid)
next := 16
for _, data := range dataBytes {
copy(b[next:], data)
next += len(data)
}
return b
}
func (rr *NetlinkRequest) AddData(data NetlinkRequestData) {
if data != nil {
rr.Data = append(rr.Data, data)
}
}
func newNetlinkRequest(proto, flags int) *NetlinkRequest {
return &NetlinkRequest{
NlMsghdr: syscall.NlMsghdr{
Len: uint32(syscall.NLMSG_HDRLEN),
Type: uint16(proto),
Flags: syscall.NLM_F_REQUEST | uint16(flags),
Seq: atomic.AddUint32(&nextSeqNr, 1),
},
}
}
type NetlinkSocket struct {
fd int
lsa syscall.SockaddrNetlink
}
func getNetlinkSocket() (*NetlinkSocket, error) {
fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, syscall.NETLINK_ROUTE)
if err != nil {
return nil, err
}
s := &NetlinkSocket{
fd: fd,
}
s.lsa.Family = syscall.AF_NETLINK
if err := syscall.Bind(fd, &s.lsa); err != nil {
syscall.Close(fd)
return nil, err
}
return s, nil
}
func (s *NetlinkSocket) Close() {
syscall.Close(s.fd)
}
func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
if err := syscall.Sendto(s.fd, request.ToWireFormat(), 0, &s.lsa); err != nil {
return err
}
return nil
}
func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) {
rb := make([]byte, syscall.Getpagesize())
nr, _, err := syscall.Recvfrom(s.fd, rb, 0)
if err != nil {
return nil, err
}
if nr < syscall.NLMSG_HDRLEN {
return nil, ErrShortResponse
}
rb = rb[:nr]
return syscall.ParseNetlinkMessage(rb)
}
func (s *NetlinkSocket) GetPid() (uint32, error) {
lsa, err := syscall.Getsockname(s.fd)
if err != nil {
return 0, err
}
switch v := lsa.(type) {
case *syscall.SockaddrNetlink:
return v.Pid, nil
}
return 0, ErrWrongSockType
}
func (s *NetlinkSocket) CheckMessage(m syscall.NetlinkMessage, seq, pid uint32) error {
if m.Header.Seq != seq {
return fmt.Errorf("netlink: invalid seq %d, expected %d", m.Header.Seq, seq)
}
if m.Header.Pid != pid {
return fmt.Errorf("netlink: wrong pid %d, expected %d", m.Header.Pid, pid)
}
if m.Header.Type == syscall.NLMSG_DONE {
return io.EOF
}
if m.Header.Type == syscall.NLMSG_ERROR {
e := int32(native.Uint32(m.Data[0:4]))
if e == 0 {
return io.EOF
}
return syscall.Errno(-e)
}
return nil
}
func (s *NetlinkSocket) HandleAck(seq uint32) error {
pid, err := s.GetPid()
if err != nil {
return err
}
outer:
for {
msgs, err := s.Receive()
if err != nil {
return err
}
for _, m := range msgs {
if err := s.CheckMessage(m, seq, pid); err != nil {
if err == io.EOF {
break outer
}
return err
}
}
}
return nil
}
func zeroTerminated(s string) []byte {
return []byte(s + "\000")
}
func nonZeroTerminated(s string) []byte {
return []byte(s)
}
// Add a new network link of a specified type.
// This is identical to running: ip link add $name type $linkType
func NetworkLinkAdd(name string, linkType string) error {
if name == "" || linkType == "" {
return fmt.Errorf("Neither link name nor link type can be empty!")
}
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
wb.AddData(msg)
linkInfo := newRtAttr(syscall.IFLA_LINKINFO, nil)
newRtAttrChild(linkInfo, IFLA_INFO_KIND, nonZeroTerminated(linkType))
wb.AddData(linkInfo)
nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name))
wb.AddData(nameData)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Delete a network link.
// This is identical to running: ip link del $name
func NetworkLinkDel(name string) error {
if name == "" {
return fmt.Errorf("Network link name can not be empty!")
}
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
wb := newNetlinkRequest(syscall.RTM_DELLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(iface.Index)
wb.AddData(msg)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Bring up a particular network interface.
// This is identical to running: ip link set dev $name up
func NetworkLinkUp(iface *net.Interface) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(iface.Index)
msg.Flags = syscall.IFF_UP
msg.Change = syscall.IFF_UP
wb.AddData(msg)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Bring down a particular network interface.
// This is identical to running: ip link set $name down
func NetworkLinkDown(iface *net.Interface) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(iface.Index)
msg.Flags = 0 & ^syscall.IFF_UP
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Set link layer address ie. MAC Address.
// This is identical to running: ip link set dev $name address $macaddress
func NetworkSetMacAddress(iface *net.Interface, macaddr string) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
hwaddr, err := net.ParseMAC(macaddr)
if err != nil {
return err
}
var (
MULTICAST byte = 0x1
)
if hwaddr[0]&0x1 == MULTICAST {
return fmt.Errorf("Multicast MAC Address is not supported: %s", macaddr)
}
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
macdata := make([]byte, 6)
copy(macdata, hwaddr)
data := newRtAttr(IFLA_ADDRESS, macdata)
wb.AddData(data)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Set link Maximum Transmission Unit
// This is identical to running: ip link set dev $name mtu $MTU
// bridge is a bitch here https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=292088
// https://bugzilla.redhat.com/show_bug.cgi?id=697021
// There is a discussion about how to deal with ifcs joining bridge with MTU > 1500
// Regular network nterfaces do seem to work though!
func NetworkSetMTU(iface *net.Interface, mtu int) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
wb.AddData(uint32Attr(syscall.IFLA_MTU, uint32(mtu)))
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Set link queue length
// This is identical to running: ip link set dev $name txqueuelen $QLEN
func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
wb.AddData(uint32Attr(syscall.IFLA_TXQLEN, uint32(txQueueLen)))
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
wb.AddData(rtattr)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Add an interface to bridge.
// This is identical to running: ip link set $name master $master
func NetworkSetMaster(iface, master *net.Interface) error {
data := uint32Attr(syscall.IFLA_MASTER, uint32(master.Index))
return networkMasterAction(iface, data)
}
// Remove an interface from the bridge
// This is is identical to to running: ip link $name set nomaster
func NetworkSetNoMaster(iface *net.Interface) error {
data := uint32Attr(syscall.IFLA_MASTER, 0)
return networkMasterAction(iface, data)
}
func networkSetNsAction(iface *net.Interface, rtattr *RtAttr) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(iface.Index)
wb.AddData(msg)
wb.AddData(rtattr)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Move a particular network interface to a particular network namespace
// specified by PID. This is identical to running: ip link set dev $name netns $pid
func NetworkSetNsPid(iface *net.Interface, nspid int) error {
data := uint32Attr(syscall.IFLA_NET_NS_PID, uint32(nspid))
return networkSetNsAction(iface, data)
}
// Move a particular network interface to a particular mounted
// network namespace specified by file descriptor.
// This is idential to running: ip link set dev $name netns $fd
func NetworkSetNsFd(iface *net.Interface, fd int) error {
data := uint32Attr(IFLA_NET_NS_FD, uint32(fd))
return networkSetNsAction(iface, data)
}
// Rename a particular interface to a different name
// !!! Note that you can't rename an active interface. You need to bring it down before renaming it.
// This is identical to running: ip link set dev ${oldName} name ${newName}
func NetworkChangeName(iface *net.Interface, newName string) error {
if len(newName) >= IFNAMSIZ {
return fmt.Errorf("Interface name %s too long", newName)
}
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(newName))
wb.AddData(nameData)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Add a new VETH pair link on the host
// This is identical to running: ip link add name $name type veth peer name $peername
func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
wb.AddData(msg)
nameData := newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(name1))
wb.AddData(nameData)
txqLen := make([]byte, 4)
native.PutUint32(txqLen, uint32(txQueueLen))
txqData := newRtAttr(syscall.IFLA_TXQLEN, txqLen)
wb.AddData(txqData)
nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
newRtAttrChild(nest1, IFLA_INFO_KIND, zeroTerminated("veth"))
nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
nest3 := newRtAttrChild(nest2, VETH_INFO_PEER, nil)
newIfInfomsgChild(nest3, syscall.AF_UNSPEC)
newRtAttrChild(nest3, syscall.IFLA_IFNAME, zeroTerminated(name2))
txqLen2 := make([]byte, 4)
native.PutUint32(txqLen2, uint32(txQueueLen))
newRtAttrChild(nest3, syscall.IFLA_TXQLEN, txqLen2)
wb.AddData(nest1)
if err := s.Send(wb); err != nil {
return err
}
if err := s.HandleAck(wb.Seq); err != nil {
if os.IsExist(err) {
return ErrInterfaceExists
}
return err
}
return nil
}
// Add a new VLAN interface with masterDev as its upper device
// This is identical to running:
// ip link add name $name link $masterdev type vlan id $id
func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
masterDevIfc, err := net.InterfaceByName(masterDev)
if err != nil {
return err
}
msg := newIfInfomsg(syscall.AF_UNSPEC)
wb.AddData(msg)
nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("vlan"))
nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
vlanData := make([]byte, 2)
native.PutUint16(vlanData, vlanId)
newRtAttrChild(nest2, IFLA_VLAN_ID, vlanData)
wb.AddData(nest1)
wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(vlanDev)))
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// MacVlan link has LowerDev, UpperDev and operates in Mode mode
// This simplifies the code when creating MacVlan or MacVtap interface
type MacVlanLink struct {
MasterDev string
SlaveDev string
mode string
}
func (m MacVlanLink) Mode() uint32 {
modeMap := map[string]uint32{
"private": MACVLAN_MODE_PRIVATE,
"vepa": MACVLAN_MODE_VEPA,
"bridge": MACVLAN_MODE_BRIDGE,
"passthru": MACVLAN_MODE_PASSTHRU,
}
return modeMap[m.mode]
}
// Add MAC VLAN network interface with masterDev as its upper device
// This is identical to running:
// ip link add name $name link $masterdev type macvlan mode $mode
func networkLinkMacVlan(dev_type string, mcvln *MacVlanLink) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
masterDevIfc, err := net.InterfaceByName(mcvln.MasterDev)
if err != nil {
return err
}
msg := newIfInfomsg(syscall.AF_UNSPEC)
wb.AddData(msg)
nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated(dev_type))
nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
macVlanData := make([]byte, 4)
native.PutUint32(macVlanData, mcvln.Mode())
newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData)
wb.AddData(nest1)
wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(mcvln.SlaveDev)))
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
return networkLinkMacVlan("macvlan", &MacVlanLink{
MasterDev: masterDev,
SlaveDev: macVlanDev,
mode: mode,
})
}
func NetworkLinkAddMacVtap(masterDev, macVlanDev string, mode string) error {
return networkLinkMacVlan("macvtap", &MacVlanLink{
MasterDev: masterDev,
SlaveDev: macVlanDev,
mode: mode,
})
}
func networkLinkIpAction(action, flags int, ifa IfAddr) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
family := getIpFamily(ifa.IP)
wb := newNetlinkRequest(action, flags)
msg := newIfAddrmsg(family)
msg.Index = uint32(ifa.Iface.Index)
prefixLen, _ := ifa.IPNet.Mask.Size()
msg.Prefixlen = uint8(prefixLen)
wb.AddData(msg)
var ipData []byte
if family == syscall.AF_INET {
ipData = ifa.IP.To4()
} else {
ipData = ifa.IP.To16()
}
localData := newRtAttr(syscall.IFA_LOCAL, ipData)
wb.AddData(localData)
addrData := newRtAttr(syscall.IFA_ADDRESS, ipData)
wb.AddData(addrData)
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Delete an IP address from an interface. This is identical to:
// ip addr del $ip/$ipNet dev $iface
func NetworkLinkDelIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
return networkLinkIpAction(
syscall.RTM_DELADDR,
syscall.NLM_F_ACK,
IfAddr{iface, ip, ipNet},
)
}
// Add an Ip address to an interface. This is identical to:
// ip addr add $ip/$ipNet dev $iface
func NetworkLinkAddIp(iface *net.Interface, ip net.IP, ipNet *net.IPNet) error {
return networkLinkIpAction(
syscall.RTM_NEWADDR,
syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK,
IfAddr{iface, ip, ipNet},
)
}
// Returns an array of IPNet for all the currently routed subnets on ipv4
// This is similar to the first column of "ip route" output
func NetworkGetRoutes() ([]Route, error) {
s, err := getNetlinkSocket()
if err != nil {
return nil, err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP)
msg := newIfInfomsg(syscall.AF_UNSPEC)
wb.AddData(msg)
if err := s.Send(wb); err != nil {
return nil, err
}
pid, err := s.GetPid()
if err != nil {
return nil, err
}
res := make([]Route, 0)
outer:
for {
msgs, err := s.Receive()
if err != nil {
return nil, err
}
for _, m := range msgs {
if err := s.CheckMessage(m, wb.Seq, pid); err != nil {
if err == io.EOF {
break outer
}
return nil, err
}
if m.Header.Type != syscall.RTM_NEWROUTE {
continue
}
var r Route
msg := (*RtMsg)(unsafe.Pointer(&m.Data[0:syscall.SizeofRtMsg][0]))
if msg.Flags&syscall.RTM_F_CLONED != 0 {
// Ignore cloned routes
continue
}
if msg.Table != syscall.RT_TABLE_MAIN {
// Ignore non-main tables
continue
}
if msg.Family != syscall.AF_INET {
// Ignore non-ipv4 routes
continue
}
if msg.Dst_len == 0 {
// Default routes
r.Default = true
}
attrs, err := syscall.ParseNetlinkRouteAttr(&m)
if err != nil {
return nil, err
}
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.RTA_DST:
ip := attr.Value
r.IPNet = &net.IPNet{
IP: ip,
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(ip)),
}
case syscall.RTA_OIF:
index := int(native.Uint32(attr.Value[0:4]))
r.Iface, _ = net.InterfaceByIndex(index)
}
}
if r.Default || r.IPNet != nil {
res = append(res, r)
}
}
}
return res, nil
}
// Add a new route table entry.
func AddRoute(destination, source, gateway, device string) error {
if destination == "" && source == "" && gateway == "" {
return fmt.Errorf("one of destination, source or gateway must not be blank")
}
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWROUTE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
msg := newRtMsg()
currentFamily := -1
var rtAttrs []*RtAttr
if destination != "" {
destIP, destNet, err := net.ParseCIDR(destination)
if err != nil {
return fmt.Errorf("destination CIDR %s couldn't be parsed", destination)
}
destFamily := getIpFamily(destIP)
currentFamily = destFamily
destLen, bits := destNet.Mask.Size()
if destLen == 0 && bits == 0 {
return fmt.Errorf("destination CIDR %s generated a non-canonical Mask", destination)
}
msg.Family = uint8(destFamily)
msg.Dst_len = uint8(destLen)
var destData []byte
if destFamily == syscall.AF_INET {
destData = destIP.To4()
} else {
destData = destIP.To16()
}
rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_DST, destData))
}
if source != "" {
srcIP := net.ParseIP(source)
if srcIP == nil {
return fmt.Errorf("source IP %s couldn't be parsed", source)
}
srcFamily := getIpFamily(srcIP)
if currentFamily != -1 && currentFamily != srcFamily {
return fmt.Errorf("source and destination ip were not the same IP family")
}
currentFamily = srcFamily
msg.Family = uint8(srcFamily)
var srcData []byte
if srcFamily == syscall.AF_INET {
srcData = srcIP.To4()
} else {
srcData = srcIP.To16()
}
rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_PREFSRC, srcData))
}
if gateway != "" {
gwIP := net.ParseIP(gateway)
if gwIP == nil {
return fmt.Errorf("gateway IP %s couldn't be parsed", gateway)
}
gwFamily := getIpFamily(gwIP)
if currentFamily != -1 && currentFamily != gwFamily {
return fmt.Errorf("gateway, source, and destination ip were not the same IP family")
}
msg.Family = uint8(gwFamily)
var gwData []byte
if gwFamily == syscall.AF_INET {
gwData = gwIP.To4()
} else {
gwData = gwIP.To16()
}
rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_GATEWAY, gwData))
}
wb.AddData(msg)
for _, attr := range rtAttrs {
wb.AddData(attr)
}
iface, err := net.InterfaceByName(device)
if err != nil {
return err
}
wb.AddData(uint32Attr(syscall.RTA_OIF, uint32(iface.Index)))
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
// Add a new default gateway. Identical to:
// ip route add default via $ip
func AddDefaultGw(ip, device string) error {
return AddRoute("", "", ip, device)
}
// THIS CODE DOES NOT COMMUNICATE WITH KERNEL VIA RTNETLINK INTERFACE
// IT IS HERE FOR BACKWARDS COMPATIBILITY WITH OLDER LINUX KERNELS
// WHICH SHIP WITH OLDER NOT ENTIRELY FUNCTIONAL VERSION OF NETLINK
func getIfSocket() (fd int, err error) {
for _, socket := range []int{
syscall.AF_INET,
syscall.AF_PACKET,
syscall.AF_INET6,
} {
if fd, err = syscall.Socket(socket, syscall.SOCK_DGRAM, 0); err == nil {
break
}
}
if err == nil {
return fd, nil
}
return -1, err
}
// Create the actual bridge device. This is more backward-compatible than
// netlink.NetworkLinkAdd and works on RHEL 6.
func CreateBridge(name string, setMacAddr bool) error {
if len(name) >= IFNAMSIZ {
return fmt.Errorf("Interface name %s too long", name)
}
s, err := getIfSocket()
if err != nil {
return err
}
defer syscall.Close(s)
nameBytePtr, err := syscall.BytePtrFromString(name)
if err != nil {
return err
}
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 {
return err
}
if setMacAddr {
return SetMacAddress(name, randMacAddr())
}
return nil
}
// Delete the actual bridge device.
func DeleteBridge(name string) error {
s, err := getIfSocket()
if err != nil {
return err
}
defer syscall.Close(s)
nameBytePtr, err := syscall.BytePtrFromString(name)
if err != nil {
return err
}
var ifr ifreqFlags
copy(ifr.IfrnName[:len(ifr.IfrnName)-1], []byte(name))
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s),
syscall.SIOCSIFFLAGS, uintptr(unsafe.Pointer(&ifr))); err != 0 {
return err
}
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s),
SIOC_BRDELBR, uintptr(unsafe.Pointer(nameBytePtr))); err != 0 {
return err
}
return nil
}
func ifIoctBridge(iface, master *net.Interface, op uintptr) error {
if len(master.Name) >= IFNAMSIZ {
return fmt.Errorf("Interface name %s too long", master.Name)
}
s, err := getIfSocket()
if err != nil {
return err
}
defer syscall.Close(s)
ifr := ifreqIndex{}
copy(ifr.IfrnName[:len(ifr.IfrnName)-1], master.Name)
ifr.IfruIndex = int32(iface.Index)
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), op, uintptr(unsafe.Pointer(&ifr))); err != 0 {
return err
}
return nil
}
// Add a slave to a bridge device. This is more backward-compatible than
// netlink.NetworkSetMaster and works on RHEL 6.
func AddToBridge(iface, master *net.Interface) error {
return ifIoctBridge(iface, master, SIOC_BRADDIF)
}
// Detach a slave from a bridge device. This is more backward-compatible than
// netlink.NetworkSetMaster and works on RHEL 6.
func DelFromBridge(iface, master *net.Interface) error {
return ifIoctBridge(iface, master, SIOC_BRDELIF)
}
func randMacAddr() string {
hw := make(net.HardwareAddr, 6)
for i := 0; i < 6; i++ {
hw[i] = byte(rnd.Intn(255))
}
hw[0] &^= 0x1 // clear multicast bit
hw[0] |= 0x2 // set local assignment bit (IEEE802)
return hw.String()
}
func SetMacAddress(name, addr string) error {
if len(name) >= IFNAMSIZ {
return fmt.Errorf("Interface name %s too long", name)
}
hw, err := net.ParseMAC(addr)
if err != nil {
return err
}
s, err := getIfSocket()
if err != nil {
return err
}
defer syscall.Close(s)
ifr := ifreqHwaddr{}
ifr.IfruHwaddr.Family = syscall.ARPHRD_ETHER
copy(ifr.IfrnName[:len(ifr.IfrnName)-1], name)
for i := 0; i < 6; i++ {
ifr.IfruHwaddr.Data[i] = ifrDataByte(hw[i])
}
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), syscall.SIOCSIFHWADDR, uintptr(unsafe.Pointer(&ifr))); err != 0 {
return err
}
return nil
}
func SetHairpinMode(iface *net.Interface, enabled bool) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
req := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_BRIDGE)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
req.AddData(msg)
mode := []byte{0}
if enabled {
mode[0] = byte(1)
}
br := newRtAttr(syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED, nil)
newRtAttrChild(br, IFLA_BRPORT_MODE, mode)
req.AddData(br)
if err := s.Send(req); err != nil {
return err
}
return s.HandleAck(req.Seq)
}
func ChangeName(iface *net.Interface, newName string) error {
if len(newName) >= IFNAMSIZ {
return fmt.Errorf("Interface name %s too long", newName)
}
fd, err := getIfSocket()
if err != nil {
return err
}
defer syscall.Close(fd)
data := [IFNAMSIZ * 2]byte{}
// the "-1"s here are very important for ensuring we get proper null
// termination of our new C strings
copy(data[:IFNAMSIZ-1], iface.Name)
copy(data[IFNAMSIZ:IFNAMSIZ*2-1], newName)
if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 {
return errno
}
return nil
}