465 lines
12 KiB
Go
465 lines
12 KiB
Go
package peermgr
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/Rican7/retry"
|
|
"github.com/Rican7/retry/strategy"
|
|
"github.com/ethereum/go-ethereum/event"
|
|
"github.com/libp2p/go-libp2p-core/peer"
|
|
"github.com/libp2p/go-libp2p-core/protocol"
|
|
"github.com/meshplus/bitxhub-model/pb"
|
|
"github.com/meshplus/bitxhub/internal/ledger"
|
|
"github.com/meshplus/bitxhub/internal/model/events"
|
|
"github.com/meshplus/bitxhub/internal/repo"
|
|
libp2pcert "github.com/meshplus/go-libp2p-cert"
|
|
network "github.com/meshplus/go-lightp2p"
|
|
ma "github.com/multiformats/go-multiaddr"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
const (
|
|
protocolID protocol.ID = "/B1txHu6/1.0.0" // magic protocol
|
|
)
|
|
|
|
type Swarm struct {
|
|
repo *repo.Repo
|
|
localID uint64
|
|
p2p network.Network
|
|
logger logrus.FieldLogger
|
|
|
|
routers map[uint64]*pb.VpInfo // trace the vp nodes
|
|
multiAddrs map[uint64]*peer.AddrInfo
|
|
connectedPeers sync.Map
|
|
notifiee *notifiee
|
|
piers *Piers
|
|
|
|
ledger ledger.Ledger
|
|
orderMessageFeed event.Feed
|
|
enablePing bool
|
|
pingTimeout time.Duration
|
|
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
}
|
|
|
|
func New(repoConfig *repo.Repo, logger logrus.FieldLogger, ledger ledger.Ledger) (*Swarm, error) {
|
|
var protocolIDs = []string{string(protocolID)}
|
|
// init peers with ips and hosts
|
|
routers := repoConfig.NetworkConfig.GetVpInfos()
|
|
bootstrap := make([]string, 0)
|
|
for _, p := range routers {
|
|
if p.Id == repoConfig.NetworkConfig.ID {
|
|
continue
|
|
}
|
|
addr := fmt.Sprintf("%s%s", p.Hosts[0], p.Pid)
|
|
bootstrap = append(bootstrap, addr)
|
|
}
|
|
|
|
multiAddrs := make(map[uint64]*peer.AddrInfo)
|
|
p2pPeers, _ := repoConfig.NetworkConfig.GetNetworkPeers()
|
|
for id, node := range p2pPeers {
|
|
if id == repoConfig.NetworkConfig.ID {
|
|
continue
|
|
}
|
|
multiAddrs[id] = node
|
|
}
|
|
|
|
tpt, err := libp2pcert.New(repoConfig.Key.Libp2pPrivKey, repoConfig.Certs)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create transport: %w", err)
|
|
}
|
|
|
|
notifiee := newNotifiee(routers, logger)
|
|
|
|
opts := []network.Option{
|
|
network.WithLocalAddr(repoConfig.NetworkConfig.LocalAddr),
|
|
network.WithPrivateKey(repoConfig.Key.Libp2pPrivKey),
|
|
network.WithProtocolIDs(protocolIDs),
|
|
network.WithLogger(logger),
|
|
// enable discovery
|
|
network.WithBootstrap(bootstrap),
|
|
network.WithNotify(notifiee),
|
|
}
|
|
|
|
if repoConfig.Config.Cert.Verify {
|
|
opts = append(opts,
|
|
network.WithTransportId(libp2pcert.ID),
|
|
network.WithTransport(tpt),
|
|
)
|
|
}
|
|
|
|
p2p, err := network.New(opts...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create p2p: %w", err)
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
return &Swarm{
|
|
repo: repoConfig,
|
|
localID: repoConfig.NetworkConfig.ID,
|
|
p2p: p2p,
|
|
logger: logger,
|
|
ledger: ledger,
|
|
enablePing: repoConfig.Config.Ping.Enable,
|
|
pingTimeout: repoConfig.Config.Ping.Duration,
|
|
routers: routers,
|
|
multiAddrs: multiAddrs,
|
|
piers: newPiers(),
|
|
connectedPeers: sync.Map{},
|
|
notifiee: notifiee,
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
}, nil
|
|
}
|
|
|
|
func (swarm *Swarm) Start() error {
|
|
swarm.p2p.SetMessageHandler(swarm.handleMessage)
|
|
|
|
if err := swarm.p2p.Start(); err != nil {
|
|
return err
|
|
}
|
|
|
|
for id, addr := range swarm.multiAddrs {
|
|
go func(id uint64, addr *peer.AddrInfo) {
|
|
if err := retry.Retry(func(attempt uint) error {
|
|
// for restart node, after updating the routing table, some nodes may not exist in routing table
|
|
routers := swarm.notifiee.getPeers()
|
|
if _, ok := routers[id]; !ok {
|
|
swarm.logger.Infof("Can't find node %d from routing table, stopping connect", id)
|
|
return nil
|
|
}
|
|
if err := swarm.p2p.Connect(*addr); err != nil {
|
|
swarm.logger.WithFields(logrus.Fields{
|
|
"node": id,
|
|
"error": err,
|
|
}).Error("Connect failed")
|
|
return err
|
|
}
|
|
|
|
swarm.logger.WithFields(logrus.Fields{
|
|
"node": id,
|
|
}).Info("Connect successfully")
|
|
|
|
swarm.connectedPeers.Store(id, addr)
|
|
|
|
return nil
|
|
},
|
|
strategy.Wait(1*time.Second),
|
|
); err != nil {
|
|
swarm.logger.Error(err)
|
|
}
|
|
}(id, addr)
|
|
}
|
|
|
|
if swarm.enablePing {
|
|
go swarm.Ping()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (swarm *Swarm) Stop() error {
|
|
swarm.cancel()
|
|
return swarm.p2p.Stop()
|
|
}
|
|
|
|
func (swarm *Swarm) Ping() {
|
|
ticker := time.NewTicker(swarm.pingTimeout)
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
fields := logrus.Fields{}
|
|
swarm.connectedPeers.Range(func(key, value interface{}) bool {
|
|
info := value.(*peer.AddrInfo)
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
pingCh, err := swarm.p2p.Ping(ctx, info.ID.String())
|
|
if err != nil {
|
|
return true
|
|
}
|
|
select {
|
|
case res := <-pingCh:
|
|
fields[fmt.Sprintf("%d", key.(uint64))] = res.RTT
|
|
case <-time.After(time.Second * 5):
|
|
swarm.logger.Errorf("ping to node %d timeout", key.(uint64))
|
|
}
|
|
return true
|
|
})
|
|
swarm.logger.WithFields(fields).Info("ping time")
|
|
case <-swarm.ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (swarm *Swarm) AsyncSend(id uint64, msg *pb.Message) error {
|
|
var (
|
|
addr string
|
|
err error
|
|
)
|
|
if addr, err = swarm.findPeer(id); err != nil {
|
|
return fmt.Errorf("p2p send: %w", err)
|
|
}
|
|
|
|
data, err := msg.Marshal()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return swarm.p2p.AsyncSend(addr, data)
|
|
}
|
|
|
|
func (swarm *Swarm) SendWithStream(s network.Stream, msg *pb.Message) error {
|
|
data, err := msg.Marshal()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return s.AsyncSend(data)
|
|
}
|
|
|
|
func (swarm *Swarm) Send(id uint64, msg *pb.Message) (*pb.Message, error) {
|
|
var (
|
|
addr string
|
|
err error
|
|
)
|
|
if addr, err = swarm.findPeer(id); err != nil {
|
|
return nil, fmt.Errorf("check id: %w", err)
|
|
}
|
|
|
|
data, err := msg.Marshal()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ret, err := swarm.p2p.Send(addr, data)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("sync send: %w", err)
|
|
}
|
|
|
|
m := &pb.Message{}
|
|
if err := m.Unmarshal(ret); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return m, nil
|
|
}
|
|
|
|
func (swarm *Swarm) Broadcast(msg *pb.Message) error {
|
|
addrs := make([]string, 0, len(swarm.routers))
|
|
for _, router := range swarm.routers {
|
|
if router.Id == swarm.localID {
|
|
continue
|
|
}
|
|
addrs = append(addrs, router.Pid)
|
|
}
|
|
|
|
// if we are in adding node but hasn't finished updateN, new node hash will be temporarily recorded
|
|
// in swarm.notifiee.newPeer.
|
|
if swarm.notifiee.newPeer != "" {
|
|
swarm.logger.Debugf("Broadcast to new peer %s", swarm.notifiee.newPeer)
|
|
addrs = append(addrs, swarm.notifiee.newPeer)
|
|
}
|
|
|
|
data, err := msg.Marshal()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return swarm.p2p.Broadcast(addrs, data)
|
|
}
|
|
|
|
func (swarm *Swarm) Peers() map[uint64]*pb.VpInfo {
|
|
return swarm.notifiee.getPeers()
|
|
}
|
|
|
|
func (swarm *Swarm) OtherPeers() map[uint64]*peer.AddrInfo {
|
|
addrInfos := make(map[uint64]*peer.AddrInfo)
|
|
for _, node := range swarm.notifiee.getPeers() {
|
|
if node.Id == swarm.localID {
|
|
continue
|
|
}
|
|
addrInfo := &peer.AddrInfo{
|
|
ID: peer.ID(node.Pid),
|
|
}
|
|
addrInfos[node.Id] = addrInfo
|
|
}
|
|
return addrInfos
|
|
}
|
|
|
|
func (swarm *Swarm) SubscribeOrderMessage(ch chan<- events.OrderMessageEvent) event.Subscription {
|
|
return swarm.orderMessageFeed.Subscribe(ch)
|
|
}
|
|
|
|
func (swarm *Swarm) findPeer(id uint64) (string, error) {
|
|
if swarm.routers[id] != nil {
|
|
return swarm.routers[id].Pid, nil
|
|
}
|
|
newPeerAddr := swarm.notifiee.newPeer
|
|
// new node id should be len(swarm.peers)+1
|
|
if uint64(len(swarm.routers)+1) == id && swarm.notifiee.newPeer != "" {
|
|
swarm.logger.Debugf("Unicast to new peer %s", swarm.notifiee.newPeer)
|
|
return newPeerAddr, nil
|
|
}
|
|
return "", fmt.Errorf("wrong id: %d", id)
|
|
}
|
|
|
|
func (swarm *Swarm) AddNode(newNodeID uint64, vpInfo *pb.VpInfo) {
|
|
if _, ok := swarm.routers[newNodeID]; ok {
|
|
swarm.logger.Warningf("VP[ID: %d, Pid: %s] has already exist in routing table", newNodeID, vpInfo.Pid)
|
|
return
|
|
}
|
|
swarm.logger.Infof("Add vp[ID: %d, Pid: %s] into routing table", newNodeID, vpInfo.Pid)
|
|
// 1. update routers and connectedPeers
|
|
swarm.routers[newNodeID] = vpInfo
|
|
addInfo, err := constructMultiaddr(vpInfo)
|
|
if err != nil {
|
|
swarm.logger.Error("Construct AddrInfo failed")
|
|
return
|
|
}
|
|
swarm.connectedPeers.Store(newNodeID, addInfo)
|
|
|
|
// 2. persist routers
|
|
if err := repo.RewriteNetworkConfig(swarm.repo.Config.RepoRoot, swarm.routers, false); err != nil {
|
|
swarm.logger.Errorf("Persist routing table failed, err: %s", err.Error())
|
|
return
|
|
}
|
|
|
|
// 3. update notifiee info
|
|
swarm.notifiee.setPeers(swarm.routers)
|
|
for id, p := range swarm.routers {
|
|
swarm.logger.Debugf("=====ID: %d, Addr: %v=====", id, p)
|
|
}
|
|
if swarm.notifiee.newPeer == vpInfo.Pid {
|
|
swarm.logger.Info("Clear notifiee newPeer info")
|
|
swarm.notifiee.newPeer = ""
|
|
} else if swarm.notifiee.newPeer != "" {
|
|
swarm.logger.Warningf("Received vpInfo %v, but it doesn't equal to notifiee newPeer %s", vpInfo, swarm.notifiee.newPeer)
|
|
}
|
|
}
|
|
|
|
func (swarm *Swarm) DelNode(delID uint64) {
|
|
var (
|
|
delNode *pb.VpInfo
|
|
ok bool
|
|
)
|
|
if delNode, ok = swarm.routers[delID]; !ok {
|
|
swarm.logger.Warningf("Can't find vp node %d from routing table ", delID)
|
|
return
|
|
}
|
|
swarm.logger.Infof("Delete node [ID: %d, peerInfo: %v] ", delID, delNode)
|
|
// 1. update routing table, multiAddrs and connectedPeers
|
|
delete(swarm.routers, delID)
|
|
delete(swarm.multiAddrs, delID)
|
|
swarm.connectedPeers.Delete(delID)
|
|
|
|
// 2. persist routers
|
|
if err := repo.RewriteNetworkConfig(swarm.repo.Config.RepoRoot, swarm.routers, false); err != nil {
|
|
swarm.logger.Errorf("Persist routing table failed, err: %s", err.Error())
|
|
return
|
|
}
|
|
for id, p := range swarm.routers {
|
|
swarm.logger.Debugf("=====ID: %d, Addr: %v=====", id, p)
|
|
}
|
|
// 3. update notifiee info
|
|
swarm.notifiee.setPeers(swarm.routers)
|
|
|
|
// 4. deleted node itself will exit the cluster
|
|
if delID == swarm.localID {
|
|
swarm.reset()
|
|
_ = swarm.p2p.Stop()
|
|
_ = swarm.Stop()
|
|
return
|
|
}
|
|
}
|
|
|
|
func (swarm *Swarm) UpdateRouter(vpInfos map[uint64]*pb.VpInfo, isNew bool) bool {
|
|
swarm.logger.Infof("Update router: %+v", vpInfos)
|
|
// 1. update routing table, multiAddrs and connectedPeers
|
|
oldRouters := swarm.routers
|
|
swarm.routers = vpInfos
|
|
for id, _ := range oldRouters {
|
|
if _, ok := vpInfos[id]; !ok {
|
|
delete(swarm.multiAddrs, id)
|
|
swarm.connectedPeers.Delete(id)
|
|
}
|
|
}
|
|
|
|
// 2. persist routers
|
|
if err := repo.RewriteNetworkConfig(swarm.repo.Config.RepoRoot, swarm.routers, isNew); err != nil {
|
|
swarm.logger.Errorf("Persist routing table failed, err: %s", err.Error())
|
|
return false
|
|
}
|
|
|
|
// 3. update notifiee info
|
|
swarm.notifiee.setPeers(vpInfos)
|
|
|
|
// 4. check if a restart node is exist in the routing table, if not, then exit the cluster
|
|
var isExist bool
|
|
for id, _ := range vpInfos {
|
|
if id == swarm.localID {
|
|
isExist = true
|
|
break
|
|
}
|
|
}
|
|
// deleted node itself will exit the cluster
|
|
if !isExist && !isNew {
|
|
swarm.reset()
|
|
_ = swarm.p2p.Stop()
|
|
_ = swarm.Stop()
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (swarm *Swarm) Disconnect(vpInfos map[uint64]*pb.VpInfo) {
|
|
for id, info := range vpInfos {
|
|
if err := swarm.p2p.Disconnect(info.Pid); err != nil {
|
|
swarm.logger.Errorf("Disconnect peer %s failed, err: %s", err.Error())
|
|
}
|
|
swarm.logger.Infof("Disconnect peer [ID: %d, Pid: %s]", id, info.Pid)
|
|
}
|
|
}
|
|
|
|
func (swarm *Swarm) CountConnectedPeers() uint64 {
|
|
var counter uint64
|
|
swarm.connectedPeers.Range(func(k, v interface{}) bool {
|
|
counter++
|
|
return true
|
|
})
|
|
return counter
|
|
}
|
|
|
|
func (swarm *Swarm) reset() {
|
|
swarm.routers = nil
|
|
swarm.multiAddrs = nil
|
|
swarm.connectedPeers = sync.Map{}
|
|
swarm.notifiee.setPeers(nil)
|
|
}
|
|
|
|
func constructMultiaddr(vpInfo *pb.VpInfo) (*peer.AddrInfo, error) {
|
|
addrs := make([]ma.Multiaddr, 0)
|
|
for _, host := range vpInfo.Hosts {
|
|
addr, err := ma.NewMultiaddr(fmt.Sprintf("%s%s", host, vpInfo.Pid))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("new Multiaddr error:%w", err)
|
|
}
|
|
addrs = append(addrs, addr)
|
|
}
|
|
|
|
addrInfo := &peer.AddrInfo{
|
|
ID: peer.ID(vpInfo.Pid),
|
|
Addrs: addrs,
|
|
}
|
|
return addrInfo, nil
|
|
}
|
|
|
|
func (swarm *Swarm) PierManager() PierManager {
|
|
return swarm
|
|
}
|