創建etcdserver
創建一個etcdserver的實例
etcdserver.NewServer
啓動服務
e.Server.Start()
etcd/embed/etcd.go
func StartEtcd(inCfg *Config) (e *Etcd, err error) {
…省略其它代碼
if e.Server, err = etcdserver.NewServer(srvcfg); err != nil {
return e, err
}
…省略其它代碼
e.Server.Start()
…省略其它代碼
}
創建節點,初始化節點信息,初始化http服務
創建節點startNode
初始化http服務Transport
並且添加其它節點
etcd/etcdserver/server.go
func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
...省略其它代碼
id, n, s, w = startNode(cfg, cl, nil)
...省略其它代碼
srv = &EtcdServer{
readych: make(chan struct{}),
Cfg: cfg,
lgMu: new(sync.RWMutex),
lg: cfg.Logger,
errorc: make(chan error, 1),
v2store: st,
snapshotter: ss,
//創建raftNode
r: *newRaftNode(
raftNodeConfig{
lg: cfg.Logger,
isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) },
Node: n,
heartbeat: heartbeat,
raftStorage: s,
storage: NewStorage(w, ss),
},
),
id: id,
attributes: membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
cluster: cl,
stats: sstats,
lstats: lstats,
SyncTicker: time.NewTicker(500 * time.Millisecond),
peerRt: prt,
reqIDGen: idutil.NewGenerator(uint16(id), time.Now()),
forceVersionC: make(chan struct{}),
AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist},
}
…省略其它代碼
// TODO: move transport initialization near the definition of remote
tr := &rafthttp.Transport{
Logger: cfg.Logger,
TLSInfo: cfg.PeerTLSInfo,
DialTimeout: cfg.peerDialTimeout(),
ID: id, //當前節點自己的ID
URLs: cfg.PeerURLs, //當前節點與集羣中其他節點交互時使用的URL地址
ClusterID: cl.ID(), //當前節點所在的集羣的ID
Raft: srv, //raft狀態機
Snapshotter: ss, //負責管理快照文件
ServerStats: sstats, //用於統計一般的transportation統計
LeaderStats: lstats, //raft協議中的leader節點統計followers節點的 transportation 狀態
ErrorC: srv.errorc,
}
if err = tr.Start(); err != nil {
return nil, err
}
// add all remotes into transport
for _, m := range remotes {
if m.ID != id {
tr.AddRemote(m.ID, m.PeerURLs)
}
}
for _, m := range cl.Members() {
if m.ID != id {
tr.AddPeer(m.ID, m.PeerURLs)
}
}
srv.r.transport = tr
return srv, nil
}
根據配置啓動一個node
etcd/etcdserver/raft.go
func startNode(cfg ServerConfig, cl *membership.RaftCluster, ids []types.ID) (id types.ID, n raft.Node, s *raft.MemoryStorage, w *wal.WAL) {
if len(peers) == 0 {
n = raft.RestartNode(c)
} else {
n = raft.StartNode(c, peers)
}
}
node提供了一個基礎對外接口,並且同時啓一個goroutine 處理各個狀態機之間通信
etcd/raft/node.go
func StartNode(c *Config, peers []Peer) Node {
if len(peers) == 0 {
panic("no peers given; use RestartNode instead")
}
rn, err := NewRawNode(c)
if err != nil {
panic(err)
}
rn.Bootstrap(peers)
n := newNode(rn)
go n.run()
return &n
}
func (n *node) run() {
var propc chan msgWithResult
var readyc chan Ready
var advancec chan struct{}
var rd Ready
r := n.rn.raft
lead := None
for {
if advancec != nil {
readyc = nil
} else if n.rn.HasReady() {//判斷是否有消息
rd = n.rn.readyWithoutAccept()//構造消息
readyc = n.readyc
}
if lead != r.lead {
if r.hasLeader() {
if lead == None {
r.logger.Infof("raft.node: %x elected leader %x at term %d", r.id, r.lead, r.Term)
} else {
r.logger.Infof("raft.node: %x changed leader from %x to %x at term %d", r.id, lead, r.lead, r.Term)
}
propc = n.propc
} else {
r.logger.Infof("raft.node: %x lost leader %x at term %d", r.id, lead, r.Term)
propc = nil
}
lead = r.lead
}
select {
case pm := <-propc://接收到寫消息 其他節點通過監聽propc channel獲取其他節點發送的投票消息,並調用Step對消息進行判斷,選擇是否投票
m := pm.m
m.From = r.id
err := r.Step(m)
if pm.result != nil {
pm.result <- err
close(pm.result)
}
case m := <-n.recvc://接收到readindex 請求
if pr := r.prs.Progress[m.From]; pr != nil || !IsResponseMsg(m.Type) {
r.Step(m)
}
case cc := <-n.confc://配置變更
_, okBefore := r.prs.Progress[r.id]
cs := r.applyConfChange(cc)
if _, okAfter := r.prs.Progress[r.id]; okBefore && !okAfter {
var found bool
for _, sl := range [][]uint64{cs.Voters, cs.VotersOutgoing} {
for _, id := range sl {
if id == r.id {
found = true
}
}
}
if !found {
propc = nil
}
}
select {
case n.confstatec <- cs:
case <-n.done:
}
case <-n.tickc://超時時間到,包括心跳超時和選舉超時等
n.rn.Tick()
case readyc <- rd://數據ready
n.rn.acceptReady(rd)
advancec = n.advancec
case <-advancec://可以進行狀態變更和日誌提交
n.rn.Advance(rd)
rd = Ready{}
advancec = nil
case c := <-n.status://節點狀態信號
c <- getStatus(r)
case <-n.stop://收到停止信號
close(n.done)
return
}
}
}
//觸發時鐘事件
func (n *node) Tick() {
…略
}
/TODO 爲外界提供了日誌提交接口 Propose 客戶端寫請求消息類型 pb.MsgProp
//阻塞等待該用戶請求被RAFT狀態機接受
func (n *node) Propose(ctx context.Context, data []byte) error {
…略
}
狀態傳輸服務
- Transport 結構體,主要是封裝了各個節點之間狀態傳輸
- 創建多路複用器 ServeMux
- 創建pipelineHandler、streamHandler 、snapHandler 三個實例,這三個實例都實現了Handler接口
etcd/etcdserver/api/rafthttp/transport.go
type Transport struct {
Logger *zap.Logger
DialTimeout time.Duration // maximum duration before timing out dial of the request
DialRetryFrequency rate.Limit
TLSInfo transport.TLSInfo // TLS information used when creating connection
ID types.ID // local member ID 當前節點自己的ID
URLs types.URLs // local peer URLs 當前節點與集羣中其他節點交互時使用的URL地址
ClusterID types.ID // raft cluster ID for request validation 當前節點所在的集羣的ID
Raft Raft // raft state machine, to which the Transport forwards received messages and reports status
Snapshotter *snap.Snapshotter //負責管理快照文件
ServerStats *stats.ServerStats // used to record general transportation statistics 用於統計一般的transportation統計
LeaderStats *stats.LeaderStats //raft協議中的leader節點統計followers節點的 transportation 狀態
ErrorC chan error
streamRt http.RoundTripper // roundTripper used by streams Stream消息通道中使用http.RoundTripper實例,HTTP長連接
pipelineRt http.RoundTripper // roundTripper used by pipelines Pipeline消息通道中使用的http.RoundTripper實例,傳輸完成後會立即關閉連接,傳輸數據量較大、發送頻率較低的消息,如MsgSnap消息
mu sync.RWMutex // protect the remote and peer map
//remote中只封裝了pipeline實例,remote主要負責發送快照數據,幫助新加入的節點快速追上其他節點的數據
remotes map[types.ID]*remote // remotes map that helps newly joined member to catch up
/*
Peer接口是當前節點對集羣中其他節點的抽象表示。對於當前節點來說,集羣中其他節點在本地都會有一個Peer實例與之對應,
peers字段維護了節點ID到對應Peer實例之間的映射關係
*/
peers map[types.ID]Peer // peers map
//用於探測Pipeline消息通道是否可用
pipelineProber probing.Prober
streamProber probing.Prober
}
//TODO 啓動HTTP服務
func (t *Transport) Start() error {
…省略其它代碼
}
func (t *Transport) Handler() http.Handler {
//創建pipelineHandler、streamHandler 、snapHandler 三個實例,這三個實例都實現了Handler接口
pipelineHandler := newPipelineHandler(t, t.Raft, t.ClusterID)
streamHandler := newStreamHandler(t, t, t.Raft, t.ID, t.ClusterID)
snapHandler := newSnapshotHandler(t, t.Raft, t.Snapshotter, t.ClusterID)
mux := http.NewServeMux()//mux是多路複用器 ServeMux主要通過m字段(map[string]muxEntry)存儲URL和Handler實例之間的映射關係,設置URL和Handler之間的對應關係
mux.Handle(RaftPrefix, pipelineHandler)
mux.Handle(RaftStreamPrefix+"/", streamHandler)
mux.Handle(RaftSnapshotPrefix, snapHandler)
mux.Handle(ProbingPrefix, probing.NewHandler())
return mux
}
…省略其它代碼
//TODO 添加對端服務,如果是三個節點,會添加兩個
func (t *Transport) AddPeer(id types.ID, us []string) {
t.mu.Lock()
defer t.mu.Unlock()
if t.peers == nil {
panic("transport stopped")
}
if _, ok := t.peers[id]; ok {
return
}
urls, err := types.NewURLs(us)
if err != nil {
if t.Logger != nil {
t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
} else {
plog.Panicf("newURLs %+v should never fail: %+v", us, err)
}
}
fs := t.LeaderStats.Follower(id.String())
t.peers[id] = startPeer(t, urls, id, fs) //starting peer
addPeerToProber(t.Logger, t.pipelineProber, id.String(), us, RoundTripperNameSnapshot, rttSec)
addPeerToProber(t.Logger, t.streamProber, id.String(), us, RoundTripperNameRaftMessage, rttSec)
if t.Logger != nil {
t.Logger.Info(
"added remote peer",
zap.String("local-member-id", t.ID.String()),
zap.String("remote-peer-id", id.String()),
zap.Strings("remote-peer-urls", us),
)
} else {
plog.Infof("added peer %s", id)
}
}
在startPeer中,聲明一個Raft狀態機實例,並啓動goroutine通過channel進行通信
etcd/etcdserver/api/rafthttp/peer.go
type peer struct {
lg *zap.Logger
localID types.ID //當前節點ID
// id of the remote raft peer node
id types.ID //該peer實例對應的節點ID,對端ID
r Raft
status *peerStatus
/*
每個節點可能提供了多個URL供其他節點正常訪問,當其中一個訪問失敗時,我們應該可以嘗試訪問另一個。
urlPicker提供的主要功能就是在這些URL之間進行切換
*/
picker *urlPicker
msgAppV2Writer *streamWriter
writer *streamWriter //負責向Stream消息通道中寫消息
pipeline *pipeline //pipeline消息通道
snapSender *snapshotSender // snapshot sender to send v3 snapshot messages
msgAppV2Reader *streamReader
msgAppReader *streamReader //負責從Stream消息通道中讀消息
recvc chan raftpb.Message //從Stream消息通道中讀取到消息之後,會通過該通道將消息交給Raft接口,然後由它返回給底層etcd-raft模塊進行處理
propc chan raftpb.Message //從Stream消息通道中讀取到MsgProp類型的消息之後,會通過該通道將MsgApp消息交給Raft接口,然後由它返回給底層的etcd-raft模塊進行處理
mu sync.Mutex
paused bool //是否暫停向其他節點發送消息
cancel context.CancelFunc // cancel pending works in go routine created by peer.
stopc chan struct{}
}
func startPeer(t *Transport, urls types.URLs, peerID types.ID, fs *stats.FollowerStats) *peer {
if t.Logger != nil {
t.Logger.Info("starting remote peer", zap.String("remote-peer-id", peerID.String()))
} else {
plog.Infof("starting peer %s...", peerID)
}
defer func() {
if t.Logger != nil {
t.Logger.Info("started remote peer", zap.String("remote-peer-id", peerID.String()))
} else {
plog.Infof("started peer %s", peerID)
}
}()
status := newPeerStatus(t.Logger, t.ID, peerID)//創建節點的狀態信息 status
picker := newURLPicker(urls)//根據節點提供的URL創建urlPicker
errorc := t.ErrorC
r := t.Raft //底層的Raft狀態機
pipeline := &pipeline{
peerID: peerID,
tr: t,
picker: picker,
status: status,
followerStats: fs,
raft: r,
errorc: errorc,
}
pipeline.start() //這裏會啓動一個協程處理
p := &peer{
lg: t.Logger,
localID: t.ID,
id: peerID,
r: r,
status: status,
picker: picker,
msgAppV2Writer: startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),//創建並啓動streamWriter
writer: startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),
pipeline: pipeline,
snapSender: newSnapshotSender(t, picker, peerID, status),
recvc: make(chan raftpb.Message, recvBufSize),//創建recvc通道
propc: make(chan raftpb.Message, maxPendingProposals),//創建propc通道
stopc: make(chan struct{}),
}
//啓動單獨的goroutine,它負責將recvc通道中讀取消息,該通道中的消息就是從對端節點發送過來的消息,
// 然後將讀取到的消息交給底層的Raft狀態機進行處理
ctx, cancel := context.WithCancel(context.Background())
p.cancel = cancel
go func() {
for {
select {
case mm := <-p.recvc://從recvc通道中獲取連接上讀取到的消息
//TODO 調用process
if err := r.Process(ctx, mm); err != nil {//將Message交給底層Raft狀態機處理
if t.Logger != nil {
t.Logger.Warn("failed to process Raft message", zap.Error(err))
} else {
plog.Warningf("failed to process raft message (%v)", err)
}
}
case <-p.stopc:
return
}
}
}()
// r.Process might block for processing proposal when there is no leader.
// Thus propc must be put into a separate routine with recvc to avoid blocking
// processing other raft messages.
//在底層的Raft狀態機處理MsgProp類型的消息時,可能會阻塞,所以啓動單獨的goroutine來處理
go func() {
for {
select {
case mm := <-p.propc://從propc通道中獲取MsgProp類型的Message
if err := r.Process(ctx, mm); err != nil {
plog.Warningf("failed to process raft message (%v)", err)
}
case <-p.stopc:
return
}
}
}()
//創建並啓動streamReader實例,主要負責從Stream消息通道上讀取消息
p.msgAppV2Reader = &streamReader{
lg: t.Logger,
peerID: peerID,
typ: streamTypeMsgAppV2,
tr: t,
picker: picker,
status: status,
recvc: p.recvc,
propc: p.propc,
rl: rate.NewLimiter(t.DialRetryFrequency, 1),
}
p.msgAppReader = &streamReader{
lg: t.Logger,
peerID: peerID,
typ: streamTypeMessage,
tr: t,
picker: picker,
status: status,
recvc: p.recvc,
propc: p.propc,
rl: rate.NewLimiter(t.DialRetryFrequency, 1),
}
p.msgAppV2Reader.start()
p.msgAppReader.start()
return p
}
上面創建了一個pipeline,並調用了它的start方法,處理消息raft狀態機的消息發送與返回結果
type pipeline struct {
peerID types.ID //該pipeline對應節點的ID
tr *Transport //關聯的rafthttp.Transport實例
picker *urlPicker //用於選擇可用的url
status *peerStatus //當前peer的狀態
raft Raft
errorc chan error
// deprecate when we depercate v2 API
followerStats *stats.FollowerStats
msgc chan raftpb.Message //pipeline實例從該通道中獲取待發送的消息
// wait for the handling routines
wg sync.WaitGroup //負責同步多個goroutine結束。每個pipeline默認開啓4個goroutine來處理msgc中的消息,必須先關閉這些goroutine,才能真正關閉該pipeline
stopc chan struct{}
}
func (p *pipeline) start() {
p.stopc = make(chan struct{})
p.msgc = make(chan raftpb.Message, pipelineBufSize)//初始化msgc通道,默認緩衝是64個
p.wg.Add(connPerPipeline)
for i := 0; i < connPerPipeline; i++ {//默認開啓4個goroutine來處理msgc中待發送的消息
go p.handle()//並將消息發送給對端節點
}
if p.tr != nil && p.tr.Logger != nil {
p.tr.Logger.Info(
"started HTTP pipelining with remote peer",
zap.String("local-member-id", p.tr.ID.String()),
zap.String("remote-peer-id", p.peerID.String()),
)
} else {
plog.Infof("started HTTP pipelining with peer %s", p.peerID)
}
}
…省略其它代碼
//下面是發送消息相關
//循環處理msgc通道中待發送的消息,然後調用pipeline.post()方法將其發送出去,發送結束之後會調用底層的Raft接口的響應方法報告發送結果
func (p *pipeline) handle() {
defer p.wg.Done()
for {
select {
case m := <-p.msgc://獲取待發送的MsgSnap類型的消息
start := time.Now()
err := p.post(pbutil.MustMarshal(&m))//將消息序列化,然後創建HTTP請求併發送出去
end := time.Now()
if err != nil {
//通知不可達
p.status.deactivate(failureType{source: pipelineMsg, action: "write"}, err.Error())
if m.Type == raftpb.MsgApp && p.followerStats != nil {
p.followerStats.Fail()
}
p.raft.ReportUnreachable(m.To)//通知底層的etcd-raft模塊,當前節點與指定的節點無法連通
if isMsgSnap(m) {//快照數據則向狀態機報告發送失敗
p.raft.ReportSnapshot(m.To, raft.SnapshotFailure)
}
sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
continue
}
p.status.activate()//連接狀態爲連通active
if m.Type == raftpb.MsgApp && p.followerStats != nil {
p.followerStats.Succ(end.Sub(start))
}
if isMsgSnap(m) {//向底層raft-node狀態機發送成功的消息
p.raft.ReportSnapshot(m.To, raft.SnapshotFinish)
}
sentBytes.WithLabelValues(types.ID(m.To).String()).Add(float64(m.Size()))
case <-p.stopc:
return
}
}
}
// post POSTs a data payload to a url. Returns nil if the POST succeeds,
// error on any failure.
func (p *pipeline) post(data []byte) (err error) {
u := p.picker.pick()//獲取對端暴露的url地址
//創建HTTP POST請求的Request
req := createPostRequest(u, RaftPrefix, bytes.NewBuffer(data), "application/protobuf", p.tr.URLs, p.tr.ID, p.tr.ClusterID)
done := make(chan struct{}, 1)//通知下面的goroutine請求是否已經發送完成
ctx, cancel := context.WithCancel(context.Background())
req = req.WithContext(ctx)
go func() {//該goroutine主要用於監聽請求是否需要取消
select {
case <-done:
case <-p.stopc://如果請求過程中,pipeline被關閉,則取消該請求
waitSchedule()
cancel()//取消請求
}
}()
resp, err := p.tr.pipelineRt.RoundTrip(req)//發送上述HTTP POST請求,並獲取到對應的的響應
done <- struct{}{}//通知上述goroutine,請求已經發送完畢
if err != nil {
p.picker.unreachable(u)
return err
}
defer resp.Body.Close()
b, err := ioutil.ReadAll(resp.Body)//讀取HTTP Response.Body內容
if err != nil {
p.picker.unreachable(u)//出現異常則將該URL標識爲不可用
return err
}
err = checkPostResponse(resp, b, req, p.peerID)//檢測響應的內容
if err != nil {
p.picker.unreachable(u)
// errMemberRemoved is a critical error since a removed member should
// always be stopped. So we use reportCriticalError to report it to errorc.
if err == errMemberRemoved {
reportCriticalError(err, p.errorc)
}
return err
}
return nil
}
…省略其它代碼
接收消息流程
- 實現ServeHTTP方法,
func newPipelineHandler(t *Transport, r Raft, cid types.ID) http.Handler {
…省略其它代碼
}
func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
…省略其它代碼
//TODO 調用Process 同步raft狀態機
if err := h.r.Process(context.TODO(), m); err != nil {
…省略其它代碼
}
…省略其它代碼
}
Raft接口
etcd/etcdserver/api/rafthttp/transport.go
type Raft interface {
Process(ctx context.Context, m raftpb.Message) error //將指定消息傳遞到etcd-raft模塊進行處理
IsIDRemoved(id uint64) bool //檢測當前節點是否從當前集羣中被移除
ReportUnreachable(id uint64) //通知底層的etcd-raft模塊,當前節點與指定的節點無法連通
ReportSnapshot(id uint64, status raft.SnapshotStatus) //通知底層的etcd-raft模塊,快照數據是否發送成功
}
2.EtcdServer是核心結構體,實現了transport裏面的Raft接口,然後再通過自已實現的process方法調用底層raft狀態機Step方法(整體的邏輯都在這個結構體,具體實現再通過其它子模塊來實現)
etcd/etcdserver/server.go
func (s *EtcdServer) Start() {
s.start()
…省略其它代碼
}
func (s *EtcdServer) start() {
…省略其它代碼
go s.run()
}
func (s *EtcdServer) run() {
…省略其它代碼
//真正去啓動raft
s.r.start(rh)
…省略其它代碼
}
func (s *EtcdServer) Process(ctx context.Context, m raftpb.Message) error {
…省略其它代碼
return s.r.Step(ctx, m)
}
func (s *EtcdServer) IsIDRemoved(id uint64) bool { return s.cluster.IsIDRemoved(types.ID(id)) }
func (s *EtcdServer) ReportUnreachable(id uint64) { s.r.ReportUnreachable(id) }
節點投票過程
- raft結構體實現的是狀態機的核心邏輯,具體細節點部分得再開一篇文章來回顧了
etcd/raft/raft.go
//TODO 節點投票過程
func (r *raft) Step(m pb.Message) error {
// Handle the message term, which may result in our stepping down to a follower.
switch {
case m.Term == 0:
// local message
case m.Term > r.Term: //例如參與選舉的Term值會比當前未參與的值大
//當節點(無論是什麼角色,包括上一屆Leader,Follower,Candidate)收到Term比自己任期號大,
//並且消息類型是MsgApp、MsgHeartbeat、MsgSnap類型的消息都會調用becomeFollower(m.Term,m.From),
//都會將當前節點的狀態切換成Follower,並進行相關狀態的初始化
if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote {
//根據消息的Context字段判斷收到的MsgPreVote(或MsgVote)消息是否爲Leader
//節點轉移場景下產生的,如果是,則強制當前節點參與本次預選(或選舉)
force := bytes.Equal(m.Context, []byte(campaignTransfer))
//檢測集羣是否開啓CheckQuorum模式,當前節點是否有已知的Lead節點,以及其選舉計時器的時間
inLease := r.checkQuorum && r.lead != None && r.electionElapsed < r.electionTimeout
if !force && inLease { //滿足此條件,該節點不參與此次選舉
return nil
}
}
switch { //在這個switch中,當前節點會根據消息類型決定是否切換狀態
case m.Type == pb.MsgPreVote: //收到MsgPreVote消息時,不會引起當前節點的狀態切換
// Never change our term in response to a PreVote
case m.Type == pb.MsgPreVoteResp && !m.Reject:
default:
r.logger.Infof("%x [term: %d] received a %s message with higher term from %x [term: %d]",
r.id, r.Term, m.Type, m.From, m.Term)
if m.Type == pb.MsgApp || m.Type == pb.MsgHeartbeat || m.Type == pb.MsgSnap {
r.becomeFollower(m.Term, m.From)
} else {
r.becomeFollower(m.Term, None)
}
}
case m.Term < r.Term:
if (r.checkQuorum || r.preVote) && (m.Type == pb.MsgHeartbeat || m.Type == pb.MsgApp) {
r.send(pb.Message{To: m.From, Type: pb.MsgAppResp})
} else if m.Type == pb.MsgPreVote {
r.send(pb.Message{To: m.From, Term: r.Term, Type: pb.MsgPreVoteResp, Reject: true})
} else {
// ignore other cases
r.logger.Infof("%x [term: %d] ignored a %s message with lower term from %x [term: %d]",
r.id, r.Term, m.Type, m.From, m.Term)
}
return nil
}
switch m.Type {
case pb.MsgHup: //推動選舉(Flower轉成PreCandidate發送的消息)
if r.state != StateLeader { //只有非Leader狀態的節點纔會處理MsgHup消息
//檢查是否有未執行的配置變更,大致就是先取出可提交還未執行的這一段,
//然後檢查裏面是否有是變更集羣配置的消息,如果有則直接return不進入candidate狀態。
if !r.promotable() {
r.logger.Warningf("%x is unpromotable and can not campaign; ignoring MsgHup", r.id)
return nil
}
//獲取raftLog中已提交但未應用的Entry記錄
ents, err := r.raftLog.slice(r.raftLog.applied+1, r.raftLog.committed+1, noLimit)
if err != nil {
r.logger.Panicf("unexpected error getting unapplied entries (%v)", err)
}
//檢測是否有未應用的EntryConfChange記錄,如果有就放棄發起選舉的機會
if n := numOfPendingConf(ents); n != 0 && r.raftLog.committed > r.raftLog.applied {
r.logger.Warningf("%x cannot campaign at term %d since there are still %d pending configuration changes to apply", r.id, r.Term, n)
return nil
}
//進入選舉
r.logger.Infof("%x is starting a new election at term %d", r.id, r.Term)
if r.preVote {
//檢測當前集羣是否開啓了PreVote模式,如果開啓了
//調用raft.campaign()方法切換當前節點的角色,發起PreVote
r.campaign(campaignPreElection)
} else {
r.campaign(campaignElection)
}
} else { //如果當前節點已經是Leader狀態,則僅僅輸出一條Debug日誌
r.logger.Debugf("%x ignoring MsgHup because already leader", r.id)
}
case pb.MsgVote, pb.MsgPreVote: //投票,預投票消息處理
// We can vote if this is a repeat of a vote we've already cast...
// 初步判斷是否可以投票
//1. 如果自身記錄的Vote值和消息的來源者相同,說明是條重複消息
//2. 如果自身尚未投票,且當前沒有leader,則可以投。
canVote := r.Vote == m.From ||
(r.Vote == None && r.lead == None) ||
(m.Type == pb.MsgPreVote && m.Term > r.Term)
//與本地最新的持久化日誌比較
if canVote && r.raftLog.isUpToDate(m.Index, m.LogTerm) {
//TODO 發送投票信息
//判斷成功,則把票回覆該節點,把票投給它。自身記錄Vote,並重設election的計數器。
r.send(pb.Message{To: m.From, Term: m.Term, Type: voteRespMsgType(m.Type)})
if m.Type == pb.MsgVote { //如果是MsgVote處理
// Only record real votes.
r.electionElapsed = 0
r.Vote = m.From
}
} else {
//否則回覆拒絕投票給該節點
r.send(pb.Message{To: m.From, Term: r.Term, Type: voteRespMsgType(m.Type), Reject: true})
}
default:
err := r.step(r, m)
if err != nil {
return err
}
}
return nil
}
參照
https://raft.github.io/
https://blog.csdn.net/skh2015java/category_9284671.html