Merge pull request #250 from meshplus/fix/fix-bugs

Fix/fix bugs
This commit is contained in:
Sandy Zhou 2020-11-24 16:52:24 +08:00 committed by GitHub
commit d7a4667998
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 134 additions and 72 deletions

View File

@ -21,8 +21,8 @@ solo = false
allowed_origins = ["*"] allowed_origins = ["*"]
[ping] [ping]
enable = false enable = true
duration = "2s" duration = "15s"
[security] [security]
enable_tls = false enable_tls = false

2
go.mod
View File

@ -26,7 +26,7 @@ require (
github.com/meshplus/bitxhub-core v0.1.0-rc1.0.20201118083603-e65956cefda2 github.com/meshplus/bitxhub-core v0.1.0-rc1.0.20201118083603-e65956cefda2
github.com/meshplus/bitxhub-kit v1.1.2-0.20201027090548-41dfc41037af github.com/meshplus/bitxhub-kit v1.1.2-0.20201027090548-41dfc41037af
github.com/meshplus/bitxhub-model v1.1.2-0.20201023091417-b6445e44d535 github.com/meshplus/bitxhub-model v1.1.2-0.20201023091417-b6445e44d535
github.com/meshplus/go-lightp2p v0.0.0-20201102131103-3fa9723c2c7c github.com/meshplus/go-lightp2p v0.0.0-20201119082512-489f5f269aac
github.com/mitchellh/go-homedir v1.1.0 github.com/mitchellh/go-homedir v1.1.0
github.com/multiformats/go-multiaddr v0.2.2 github.com/multiformats/go-multiaddr v0.2.2
github.com/orcaman/concurrent-map v0.0.0-20190826125027-8c72a8bb44f6 github.com/orcaman/concurrent-map v0.0.0-20190826125027-8c72a8bb44f6

View File

@ -232,6 +232,7 @@ func TestInterchainManager_HandleIBTP(t *testing.T) {
from := types.NewAddress([]byte{0}).String() from := types.NewAddress([]byte{0}).String()
to := types.NewAddress([]byte{1}).String() to := types.NewAddress([]byte{1}).String()
mockStub.EXPECT().Set(gomock.Any(), gomock.Any()).AnyTimes() mockStub.EXPECT().Set(gomock.Any(), gomock.Any()).AnyTimes()
mockStub.EXPECT().SetObject(gomock.Any(), gomock.Any()).AnyTimes()
f1 := mockStub.EXPECT().Get(appchainMgr.PREFIX+from).Return(false, nil) f1 := mockStub.EXPECT().Get(appchainMgr.PREFIX+from).Return(false, nil)
interchain := pb.Interchain{ interchain := pb.Interchain{
@ -290,7 +291,7 @@ func TestInterchainManager_HandleIBTP(t *testing.T) {
res = im.HandleIBTP(ibtp) res = im.HandleIBTP(ibtp)
assert.False(t, res.Ok) assert.False(t, res.Ok)
assert.Equal(t, "wrong index, required 2, but 0", string(res.Result)) assert.Equal(t, "index already exists, required 2, but 0", string(res.Result))
ibtp.Index = 2 ibtp.Index = 2
res = im.HandleIBTP(ibtp) res = im.HandleIBTP(ibtp)

View File

@ -186,7 +186,10 @@ func (x *InterchainManager) checkIBTP(ibtp *pb.IBTP, interchain *pb.Interchain)
} }
idx := interchain.InterchainCounter[ibtp.To] idx := interchain.InterchainCounter[ibtp.To]
if idx+1 != ibtp.Index { if ibtp.Index <= idx {
return fmt.Errorf(fmt.Sprintf("index already exists, required %d, but %d", idx+1, ibtp.Index))
}
if ibtp.Index > idx+1 {
return fmt.Errorf(fmt.Sprintf("wrong index, required %d, but %d", idx+1, ibtp.Index)) return fmt.Errorf(fmt.Sprintf("wrong index, required %d, but %d", idx+1, ibtp.Index))
} }
} else { } else {
@ -195,10 +198,12 @@ func (x *InterchainManager) checkIBTP(ibtp *pb.IBTP, interchain *pb.Interchain)
} }
idx := interchain.ReceiptCounter[ibtp.To] idx := interchain.ReceiptCounter[ibtp.To]
if idx+1 != ibtp.Index { if ibtp.Index <= idx {
if interchain.SourceReceiptCounter[ibtp.To]+1 != ibtp.Index { return fmt.Errorf(fmt.Sprintf("receipt index already exists, required %d, but %d", idx+1, ibtp.Index))
return fmt.Errorf("wrong receipt index, required %d, but %d", idx+1, ibtp.Index) }
}
if ibtp.Index > idx+1 {
return fmt.Errorf(fmt.Sprintf("wrong receipt index, required %d, but %d", idx+1, ibtp.Index))
} }
} }
@ -224,6 +229,8 @@ func (x *InterchainManager) ProcessIBTP(ibtp *pb.IBTP, interchain *pb.Interchain
ic, _ := x.getInterchain(ibtp.To) ic, _ := x.getInterchain(ibtp.To)
ic.SourceReceiptCounter[ibtp.From] = ibtp.Index ic.SourceReceiptCounter[ibtp.From] = ibtp.Index
x.setInterchain(ibtp.To, ic) x.setInterchain(ibtp.To, ic)
x.SetObject(x.indexReceiptMapKey(ibtp.ID()), x.GetTxHash())
} }
x.PostInterchainEvent(m) x.PostInterchainEvent(m)
@ -402,3 +409,7 @@ func AppchainKey(id string) string {
func (x *InterchainManager) indexMapKey(id string) string { func (x *InterchainManager) indexMapKey(id string) string {
return fmt.Sprintf("index-tx-%s", id) return fmt.Sprintf("index-tx-%s", id)
} }
func (x *InterchainManager) indexReceiptMapKey(id string) string {
return fmt.Sprintf("index-receipt-tx-%s", id)
}

View File

@ -9,6 +9,9 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/gogo/protobuf/sortkeys"
"github.com/meshplus/bitxhub-kit/storage" "github.com/meshplus/bitxhub-kit/storage"
"github.com/meshplus/bitxhub-kit/types" "github.com/meshplus/bitxhub-kit/types"
"github.com/meshplus/bitxhub-model/pb" "github.com/meshplus/bitxhub-model/pb"
@ -16,10 +19,6 @@ import (
raftproto "github.com/meshplus/bitxhub/pkg/order/etcdraft/proto" raftproto "github.com/meshplus/bitxhub/pkg/order/etcdraft/proto"
"github.com/meshplus/bitxhub/pkg/order/mempool" "github.com/meshplus/bitxhub/pkg/order/mempool"
"github.com/meshplus/bitxhub/pkg/peermgr" "github.com/meshplus/bitxhub/pkg/peermgr"
"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/gogo/protobuf/sortkeys"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
@ -52,10 +51,14 @@ type Node struct {
snapshotIndex uint64 // current snapshot apply index in raft log snapshotIndex uint64 // current snapshot apply index in raft log
lastIndex uint64 // last apply index in raft log lastIndex uint64 // last apply index in raft log
readyPool *sync.Pool // ready pool, avoiding memory growth fast readyPool *sync.Pool // ready pool, avoiding memory growth fast
readyCache sync.Map // ready cache readyCache sync.Map // ready cache
ctx context.Context // context
haltC chan struct{} // exit signal justElected bool
isRestart bool
ctx context.Context // context
haltC chan struct{} // exit signal
} }
@ -136,6 +139,7 @@ func (n *Node) Start() error {
} }
if restart { if restart {
n.node = raft.RestartNode(rc) n.node = raft.RestartNode(rc)
n.isRestart = true
} else { } else {
n.node = raft.StartNode(rc, n.peers) n.node = raft.StartNode(rc, n.peers)
} }
@ -175,17 +179,17 @@ func (n *Node) ReportState(height uint64, hash types.Hash) {
} }
appliedIndex, ok := n.blockAppliedIndex.Load(height) appliedIndex, ok := n.blockAppliedIndex.Load(height)
if !ok { if !ok {
n.logger.Errorf("can not found appliedIndex:", height) n.logger.Debugf("can not found appliedIndex:", height)
return return
} }
// block already persisted, record the apply index in db // block already persisted, record the apply index in db
n.writeAppliedIndex(appliedIndex.(uint64)) n.writeAppliedIndex(appliedIndex.(uint64))
n.blockAppliedIndex.Delete(height) n.blockAppliedIndex.Delete(height - 1)
// TODO: delete readyCache // TODO: delete readyCache
readyBytes, ok := n.readyCache.Load(height) readyBytes, ok := n.readyCache.Load(height)
if !ok { if !ok {
n.logger.Errorf("can not found ready:", height) n.logger.Debugf("can not found ready:", height)
return return
} }
ready := readyBytes.(*raftproto.Ready) ready := readyBytes.(*raftproto.Ready)
@ -273,17 +277,13 @@ func (n *Node) run() {
if !ok { if !ok {
n.proposeC = nil n.proposeC = nil
} else { } else {
if !n.IsLeader() {
n.logger.Warn("Follower node can't propose a proposal")
n.mempool.UpdateLeader(n.leader)
continue
}
data, err := ready.Marshal() data, err := ready.Marshal()
if err != nil { if err != nil {
n.logger.Panic(err) n.logger.Panic(err)
} }
n.logger.Debugf("Proposed block %d to raft core consensus", ready.Height)
if err := n.node.Propose(n.ctx, data); err != nil { if err := n.node.Propose(n.ctx, data); err != nil {
n.logger.Panic("Failed to propose block [%d] to raft: %s", ready.Height, err) n.logger.Errorf("Failed to propose block [%d] to raft: %s", ready.Height, err)
} }
} }
case cc, ok := <-n.confChangeC: case cc, ok := <-n.confChangeC:
@ -293,7 +293,7 @@ func (n *Node) run() {
confChangeCount++ confChangeCount++
cc.ID = confChangeCount cc.ID = confChangeCount
if err := n.node.ProposeConfChange(n.ctx, cc); err != nil { if err := n.node.ProposeConfChange(n.ctx, cc); err != nil {
n.logger.Panic("Failed to propose configuration update to Raft node: %s", err) n.logger.Errorf("Failed to propose configuration update to Raft node: %s", err)
} }
} }
case <-n.ctx.Done(): case <-n.ctx.Done():
@ -314,9 +314,30 @@ func (n *Node) run() {
// 1: Write HardState, Entries, and Snapshot to persistent storage if they // 1: Write HardState, Entries, and Snapshot to persistent storage if they
// are not empty. // are not empty.
if err := n.raftStorage.Store(rd.Entries, rd.HardState, rd.Snapshot); err != nil { if err := n.raftStorage.Store(rd.Entries, rd.HardState, rd.Snapshot); err != nil {
n.logger.Fatalf("failed to persist etcd/raft data: %s", err) n.logger.Errorf("failed to persist etcd/raft data: %s", err)
} }
if rd.SoftState != nil {
newLeader := atomic.LoadUint64(&rd.SoftState.Lead)
if newLeader != n.leader {
n.logger.Infof("Raft leader changed: %d -> %d", n.leader, newLeader)
oldLeader := n.leader
n.leader = newLeader
if newLeader == n.id {
// If the cluster is started for the first time, the leader node starts listening requests directly.
if !n.isRestart && n.getBlockAppliedIndex() == uint64(0) {
n.mempool.UpdateLeader(n.leader)
} else {
// new leader should not serve requests
n.justElected = true
}
}
// old leader node stop batch block
if oldLeader == n.id {
n.mempool.UpdateLeader(n.leader)
}
}
}
// 2: Apply Snapshot (if any) and CommittedEntries to the state machine. // 2: Apply Snapshot (if any) and CommittedEntries to the state machine.
if len(rd.CommittedEntries) != 0 { if len(rd.CommittedEntries) != 0 {
if ok := n.publishEntries(n.entriesToApply(rd.CommittedEntries)); !ok { if ok := n.publishEntries(n.entriesToApply(rd.CommittedEntries)); !ok {
@ -324,13 +345,19 @@ func (n *Node) run() {
return return
} }
} }
if rd.SoftState != nil {
newLeader := atomic.LoadUint64(&rd.SoftState.Lead) if n.justElected {
n.leader = newLeader msgInflight := n.ramLastIndex() > n.appliedIndex+1
n.mempool.UpdateLeader(newLeader) if msgInflight {
n.logger.Debugf("There are in flight blocks, new leader should not serve requests")
continue
}
n.justElected = false
n.mempool.UpdateLeader(n.leader)
} }
// 3: AsyncSend all Messages to the nodes named in the To field. // 3: AsyncSend all Messages to the nodes named in the To field.
go n.send(rd.Messages) n.send(rd.Messages)
n.maybeTriggerSnapshot() n.maybeTriggerSnapshot()
@ -342,46 +369,57 @@ func (n *Node) run() {
} }
} }
func (n *Node) ramLastIndex() uint64 {
i, _ := n.raftStorage.ram.LastIndex()
n.logger.Infof("New Leader's last index is %d, appliedIndex is %d", i, n.appliedIndex)
return i
}
// send raft consensus message // send raft consensus message
func (n *Node) send(messages []raftpb.Message) { func (n *Node) send(messages []raftpb.Message) {
for _, msg := range messages { for _, msg := range messages {
if msg.To == 0 { go func(msg raftpb.Message) {
continue if msg.To == 0 {
} return
status := raft.SnapshotFinish }
status := raft.SnapshotFinish
data, err := (&msg).Marshal() data, err := (&msg).Marshal()
if err != nil { if err != nil {
n.logger.Error(err) n.logger.Error(err)
continue return
} }
rm := &raftproto.RaftMessage{ rm := &raftproto.RaftMessage{
Type: raftproto.RaftMessage_CONSENSUS, Type: raftproto.RaftMessage_CONSENSUS,
Data: data, Data: data,
} }
rmData, err := rm.Marshal() rmData, err := rm.Marshal()
if err != nil { if err != nil {
n.logger.Error(err) n.logger.Error(err)
continue return
} }
p2pMsg := &pb.Message{ p2pMsg := &pb.Message{
Type: pb.Message_CONSENSUS, Type: pb.Message_CONSENSUS,
Data: rmData, Data: rmData,
} }
err = n.peerMgr.AsyncSend(msg.To, p2pMsg) err = n.peerMgr.AsyncSend(msg.To, p2pMsg)
if err != nil { if err != nil {
n.logger.WithFields(logrus.Fields{ n.logger.WithFields(logrus.Fields{
"from": msg.From, "from": n.id,
}).Debug(err) "to": msg.To,
n.node.ReportUnreachable(msg.To) "msg_type": msg.Type,
status = raft.SnapshotFailure "err": err.Error(),
} }).Debugf("async send msg")
n.node.ReportUnreachable(msg.To)
status = raft.SnapshotFailure
}
if msg.Type == raftpb.MsgSnap { if msg.Type == raftpb.MsgSnap {
n.node.ReportSnapshot(msg.To, status) n.node.ReportSnapshot(msg.To, status)
} }
}(msg)
} }
} }
@ -394,11 +432,6 @@ func (n *Node) publishEntries(ents []raftpb.Entry) bool {
break break
} }
ready := n.readyPool.Get().(*raftproto.Ready)
if err := ready.Unmarshal(ents[i].Data); err != nil {
n.logger.Error(err)
continue
}
// This can happen: // This can happen:
// //
// if (1) we crashed after applying this block to the chain, but // if (1) we crashed after applying this block to the chain, but
@ -414,6 +447,12 @@ func (n *Node) publishEntries(ents []raftpb.Entry) bool {
continue continue
} }
ready := n.readyPool.Get().(*raftproto.Ready)
if err := ready.Unmarshal(ents[i].Data); err != nil {
n.logger.Error(err)
continue
}
n.mint(ready) n.mint(ready)
n.blockAppliedIndex.Store(ready.Height, ents[i].Index) n.blockAppliedIndex.Store(ready.Height, ents[i].Index)
case raftpb.EntryConfChange: case raftpb.EntryConfChange:

View File

@ -87,7 +87,7 @@ func (swarm *Swarm) Start() error {
return err return err
} }
if err := swarm.verifyCert(id); err != nil { if err := swarm.verifyCertOrDisconnect(id); err != nil {
if attempt != 0 && attempt%5 == 0 { if attempt != 0 && attempt%5 == 0 {
swarm.logger.WithFields(logrus.Fields{ swarm.logger.WithFields(logrus.Fields{
"node": id, "node": id,
@ -126,6 +126,17 @@ func (swarm *Swarm) Stop() error {
return nil return nil
} }
func (swarm *Swarm) verifyCertOrDisconnect(id uint64) error {
if err := swarm.verifyCert(id); err != nil {
if err = swarm.p2p.Disconnect(swarm.peers[id].ID.String()); err != nil {
return err
}
return err
}
return nil
}
func (swarm *Swarm) Ping() { func (swarm *Swarm) Ping() {
ticker := time.NewTicker(swarm.pingTimeout) ticker := time.NewTicker(swarm.pingTimeout)
for { for {