etcd源碼raft(未整理)

創建etcdserver

創建一個etcdserver的實例
etcdserver.NewServer
啓動服務
e.Server.Start()

etcd/embed/etcd.go

func StartEtcd(inCfg *Config) (e *Etcd, err error) {
	…省略其它代碼
    if e.Server, err = etcdserver.NewServer(srvcfg); err != nil {
        return e, err
    }

    …省略其它代碼
    e.Server.Start()
	…省略其它代碼
}

創建節點,初始化節點信息,初始化http服務

創建節點startNode
初始化http服務Transport
並且添加其它節點
etcd/etcdserver/server.go

func NewServer(cfg ServerConfig) (srv *EtcdServer, err error) {
	...省略其它代碼
	id, n, s, w = startNode(cfg, cl, nil)
	...省略其它代碼
	srv = &EtcdServer{
        readych:     make(chan struct{}),
        Cfg:         cfg,
        lgMu:        new(sync.RWMutex),
        lg:          cfg.Logger,
        errorc:      make(chan error, 1),
        v2store:     st,
        snapshotter: ss,

	//創建raftNode
        r: *newRaftNode(
            raftNodeConfig{
                lg:          cfg.Logger,
                isIDRemoved: func(id uint64) bool { return cl.IsIDRemoved(types.ID(id)) },
                Node:        n,
                heartbeat:   heartbeat,
                raftStorage: s,
                storage:     NewStorage(w, ss),
            },
        ),
        id:               id,
        attributes:       membership.Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
        cluster:          cl,
        stats:            sstats,
        lstats:           lstats,
        SyncTicker:       time.NewTicker(500 * time.Millisecond),
        peerRt:           prt,
        reqIDGen:         idutil.NewGenerator(uint16(id), time.Now()),
        forceVersionC:    make(chan struct{}),
        AccessController: &AccessController{CORS: cfg.CORS, HostWhitelist: cfg.HostWhitelist},
    }

	…省略其它代碼

// TODO: move transport initialization near the definition of remote
    tr := &rafthttp.Transport{
        Logger:      cfg.Logger,
        TLSInfo:     cfg.PeerTLSInfo,
        DialTimeout: cfg.peerDialTimeout(),
        ID:          id,    //當前節點自己的ID
        URLs:        cfg.PeerURLs,  //當前節點與集羣中其他節點交互時使用的URL地址
        ClusterID:   cl.ID(),   //當前節點所在的集羣的ID
        Raft:        srv,   //raft狀態機
        Snapshotter: ss,    //負責管理快照文件
        ServerStats: sstats,    //用於統計一般的transportation統計
        LeaderStats: lstats,    //raft協議中的leader節點統計followers節點的 transportation 狀態
        ErrorC:      srv.errorc,
    }
    if err = tr.Start(); err != nil {
        return nil, err
    }
    // add all remotes into transport
    for _, m := range remotes {
        if m.ID != id {
            tr.AddRemote(m.ID, m.PeerURLs)
        }
    }
    for _, m := range cl.Members() {
        if m.ID != id {
            tr.AddPeer(m.ID, m.PeerURLs)
        }
    }
    srv.r.transport = tr

    return srv, nil


}

根據配置啓動一個node

etcd/etcdserver/raft.go

func startNode(cfg ServerConfig, cl *membership.RaftCluster, ids []types.ID) (id types.ID, n raft.Node, s *raft.MemoryStorage, w *wal.WAL) {
    if len(peers) == 0 {
        n = raft.RestartNode(c)
    } else {
        n = raft.StartNode(c, peers)
    }
}

node提供了一個基礎對外接口,並且同時啓一個goroutine 處理各個狀態機之間通信
etcd/raft/node.go

func StartNode(c *Config, peers []Peer) Node {
    if len(peers) == 0 {
        panic("no peers given; use RestartNode instead")
    }
    rn, err := NewRawNode(c)
    if err != nil {
        panic(err)
    }
    rn.Bootstrap(peers)

    n := newNode(rn)

    go n.run()
    return &n
}


func (n *node) run() {
    var propc chan msgWithResult
    var readyc chan Ready
    var advancec chan struct{}
    var rd Ready

    r := n.rn.raft

    lead := None

    for {
        if advancec != nil {
            readyc = nil
        } else if n.rn.HasReady() {//判斷是否有消息
            rd = n.rn.readyWithoutAccept()//構造消息
            readyc = n.readyc
        }

        if lead != r.lead {
            if r.hasLeader() {
                if lead == None {
                    r.logger.Infof("raft.node: %x elected leader %x at term %d", r.id, r.lead, r.Term)
                } else {
                    r.logger.Infof("raft.node: %x changed leader from %x to %x at term %d", r.id, lead, r.lead, r.Term)
                }
                propc = n.propc
            } else {
                r.logger.Infof("raft.node: %x lost leader %x at term %d", r.id, lead, r.Term)
                propc = nil
            }
            lead = r.lead
        }

        select {
        case pm := <-propc://接收到寫消息 其他節點通過監聽propc channel獲取其他節點發送的投票消息,並調用Step對消息進行判斷,選擇是否投票
            m := pm.m
            m.From = r.id
            err := r.Step(m)
            if pm.result != nil {
                pm.result <- err
                close(pm.result)
            }
        case m := <-n.recvc://接收到readindex 請求
            if pr := r.prs.Progress[m.From]; pr != nil || !IsResponseMsg(m.Type) {
                r.Step(m)
            }
        case cc := <-n.confc://配置變更
            _, okBefore := r.prs.Progress[r.id]
            cs := r.applyConfChange(cc)
            if _, okAfter := r.prs.Progress[r.id]; okBefore && !okAfter {
                var found bool
                for _, sl := range [][]uint64{cs.Voters, cs.VotersOutgoing} {
                    for _, id := range sl {
                        if id == r.id {
                            found = true
                        }
                    }
                }
                if !found {
                    propc = nil
                }
            }
            select {
            case n.confstatec <- cs:
            case <-n.done:
            }
        case <-n.tickc://超時時間到,包括心跳超時和選舉超時等
            n.rn.Tick()
        case readyc <- rd://數據ready
            n.rn.acceptReady(rd)
            advancec = n.advancec
        case <-advancec://可以進行狀態變更和日誌提交
            n.rn.Advance(rd)
            rd = Ready{}
            advancec = nil
        case c := <-n.status://節點狀態信號
            c <- getStatus(r)
        case <-n.stop://收到停止信號
            close(n.done)
            return
        }
    }
}
//觸發時鐘事件
func (n *node) Tick() {
	…略
}

/TODO 爲外界提供了日誌提交接口 Propose  客戶端寫請求消息類型 pb.MsgProp
//阻塞等待該用戶請求被RAFT狀態機接受
func (n *node) Propose(ctx context.Context, data []byte) error {
    …略
}


狀態傳輸服務

  1. Transport 結構體,主要是封裝了各個節點之間狀態傳輸
  2. 創建多路複用器 ServeMux
  3. 創建pipelineHandler、streamHandler 、snapHandler 三個實例,這三個實例都實現了Handler接口

etcd/etcdserver/api/rafthttp/transport.go

type Transport struct {
    Logger *zap.Logger
    DialTimeout time.Duration // maximum duration before timing out dial of the request
    DialRetryFrequency rate.Limit
    TLSInfo transport.TLSInfo // TLS information used when creating connection
    ID          types.ID           // local member ID  當前節點自己的ID
    URLs        types.URLs         // local peer URLs 當前節點與集羣中其他節點交互時使用的URL地址
    ClusterID   types.ID           // raft cluster ID for request validation  當前節點所在的集羣的ID
    Raft        Raft               // raft state machine, to which the Transport forwards received messages and reports status
    Snapshotter *snap.Snapshotter  //負責管理快照文件
    ServerStats *stats.ServerStats // used to record general transportation statistics 用於統計一般的transportation統計
    LeaderStats *stats.LeaderStats //raft協議中的leader節點統計followers節點的 transportation 狀態
    ErrorC chan error

    streamRt   http.RoundTripper // roundTripper used by streams  Stream消息通道中使用http.RoundTripper實例,HTTP長連接
    pipelineRt http.RoundTripper // roundTripper used by pipelines Pipeline消息通道中使用的http.RoundTripper實例,傳輸完成後會立即關閉連接,傳輸數據量較大、發送頻率較低的消息,如MsgSnap消息

    mu      sync.RWMutex         // protect the remote and peer map
    //remote中只封裝了pipeline實例,remote主要負責發送快照數據,幫助新加入的節點快速追上其他節點的數據
    remotes map[types.ID]*remote // remotes map that helps newly joined member to catch up
    /*
    Peer接口是當前節點對集羣中其他節點的抽象表示。對於當前節點來說,集羣中其他節點在本地都會有一個Peer實例與之對應,
    peers字段維護了節點ID到對應Peer實例之間的映射關係
    */
    peers   map[types.ID]Peer    // peers map
//用於探測Pipeline消息通道是否可用
    pipelineProber probing.Prober
    streamProber   probing.Prober
}


//TODO 啓動HTTP服務
func (t *Transport) Start() error {
    …省略其它代碼
}

func (t *Transport) Handler() http.Handler {
    //創建pipelineHandler、streamHandler 、snapHandler 三個實例,這三個實例都實現了Handler接口
    pipelineHandler := newPipelineHandler(t, t.Raft, t.ClusterID)
    streamHandler := newStreamHandler(t, t, t.Raft, t.ID, t.ClusterID)
    snapHandler := newSnapshotHandler(t, t.Raft, t.Snapshotter, t.ClusterID)
    mux := http.NewServeMux()//mux是多路複用器 ServeMux主要通過m字段(map[string]muxEntry)存儲URL和Handler實例之間的映射關係,設置URL和Handler之間的對應關係
    mux.Handle(RaftPrefix, pipelineHandler)
    mux.Handle(RaftStreamPrefix+"/", streamHandler)
    mux.Handle(RaftSnapshotPrefix, snapHandler)
    mux.Handle(ProbingPrefix, probing.NewHandler())
    return mux
}


…省略其它代碼


//TODO 添加對端服務,如果是三個節點,會添加兩個
func (t *Transport) AddPeer(id types.ID, us []string) {
    t.mu.Lock()
    defer t.mu.Unlock()

    if t.peers == nil {
        panic("transport stopped")
    }
    if _, ok := t.peers[id]; ok {
        return
    }
    urls, err := types.NewURLs(us)
    if err != nil {
        if t.Logger != nil {
            t.Logger.Panic("failed NewURLs", zap.Strings("urls", us), zap.Error(err))
        } else {
            plog.Panicf("newURLs %+v should never fail: %+v", us, err)
        }
    }
    fs := t.LeaderStats.Follower(id.String())
    t.peers[id] = startPeer(t, urls, id, fs) //starting peer
    addPeerToProber(t.Logger, t.pipelineProber, id.String(), us, RoundTripperNameSnapshot, rttSec)
    addPeerToProber(t.Logger, t.streamProber, id.String(), us, RoundTripperNameRaftMessage, rttSec)

    if t.Logger != nil {
        t.Logger.Info(
            "added remote peer",
            zap.String("local-member-id", t.ID.String()),
            zap.String("remote-peer-id", id.String()),
            zap.Strings("remote-peer-urls", us),
        )
    } else {
        plog.Infof("added peer %s", id)
    }
}


在startPeer中,聲明一個Raft狀態機實例,並啓動goroutine通過channel進行通信
etcd/etcdserver/api/rafthttp/peer.go

type peer struct {
    lg *zap.Logger

    localID types.ID //當前節點ID
    // id of the remote raft peer node
    id types.ID //該peer實例對應的節點ID,對端ID

    r Raft

    status *peerStatus
    /*
        每個節點可能提供了多個URL供其他節點正常訪問,當其中一個訪問失敗時,我們應該可以嘗試訪問另一個。
        urlPicker提供的主要功能就是在這些URL之間進行切換
    */
    picker *urlPicker

    msgAppV2Writer *streamWriter
    writer         *streamWriter   //負責向Stream消息通道中寫消息
    pipeline       *pipeline       //pipeline消息通道
    snapSender     *snapshotSender // snapshot sender to send v3 snapshot messages
    msgAppV2Reader *streamReader
    msgAppReader   *streamReader //負責從Stream消息通道中讀消息

    recvc chan raftpb.Message //從Stream消息通道中讀取到消息之後,會通過該通道將消息交給Raft接口,然後由它返回給底層etcd-raft模塊進行處理
    propc chan raftpb.Message //從Stream消息通道中讀取到MsgProp類型的消息之後,會通過該通道將MsgApp消息交給Raft接口,然後由它返回給底層的etcd-raft模塊進行處理

    mu     sync.Mutex
    paused bool //是否暫停向其他節點發送消息

    cancel context.CancelFunc // cancel pending works in go routine created by peer.
    stopc  chan struct{}
}



func startPeer(t *Transport, urls types.URLs, peerID types.ID, fs *stats.FollowerStats) *peer {
    if t.Logger != nil {
        t.Logger.Info("starting remote peer", zap.String("remote-peer-id", peerID.String()))
    } else {
        plog.Infof("starting peer %s...", peerID)
    }
    defer func() {
        if t.Logger != nil {
            t.Logger.Info("started remote peer", zap.String("remote-peer-id", peerID.String()))
        } else {
            plog.Infof("started peer %s", peerID)
        }
    }()

    status := newPeerStatus(t.Logger, t.ID, peerID)//創建節點的狀態信息  status
    picker := newURLPicker(urls)//根據節點提供的URL創建urlPicker
    errorc := t.ErrorC
    r := t.Raft //底層的Raft狀態機
    pipeline := &pipeline{
        peerID:        peerID,
        tr:            t,
        picker:        picker,
        status:        status,
        followerStats: fs,
        raft:          r,
        errorc:        errorc,
    }
    pipeline.start() //這裏會啓動一個協程處理

    p := &peer{
        lg:             t.Logger,
        localID:        t.ID,
        id:             peerID,
        r:              r,
        status:         status,
        picker:         picker,
        msgAppV2Writer: startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),//創建並啓動streamWriter
        writer:         startStreamWriter(t.Logger, t.ID, peerID, status, fs, r),
        pipeline:       pipeline,
        snapSender:     newSnapshotSender(t, picker, peerID, status),
        recvc:          make(chan raftpb.Message, recvBufSize),//創建recvc通道
        propc:          make(chan raftpb.Message, maxPendingProposals),//創建propc通道
        stopc:          make(chan struct{}),
    }
    //啓動單獨的goroutine,它負責將recvc通道中讀取消息,該通道中的消息就是從對端節點發送過來的消息,
    // 然後將讀取到的消息交給底層的Raft狀態機進行處理
    ctx, cancel := context.WithCancel(context.Background())
    p.cancel = cancel
    go func() {
        for {
            select {
            case mm := <-p.recvc://從recvc通道中獲取連接上讀取到的消息
                //TODO 調用process
                if err := r.Process(ctx, mm); err != nil {//將Message交給底層Raft狀態機處理
                    if t.Logger != nil {
                        t.Logger.Warn("failed to process Raft message", zap.Error(err))
                    } else {
                        plog.Warningf("failed to process raft message (%v)", err)
                    }
                }
            case <-p.stopc:
                return
            }
        }
    }()

    // r.Process might block for processing proposal when there is no leader.
    // Thus propc must be put into a separate routine with recvc to avoid blocking
    // processing other raft messages.
    //在底層的Raft狀態機處理MsgProp類型的消息時,可能會阻塞,所以啓動單獨的goroutine來處理
    go func() {
        for {
            select {
            case mm := <-p.propc://從propc通道中獲取MsgProp類型的Message
                if err := r.Process(ctx, mm); err != nil {
                    plog.Warningf("failed to process raft message (%v)", err)
                }
            case <-p.stopc:
                return
            }
        }
    }()
//創建並啓動streamReader實例,主要負責從Stream消息通道上讀取消息
    p.msgAppV2Reader = &streamReader{
        lg:     t.Logger,
        peerID: peerID,
        typ:    streamTypeMsgAppV2,
        tr:     t,
        picker: picker,
        status: status,
        recvc:  p.recvc,
        propc:  p.propc,
        rl:     rate.NewLimiter(t.DialRetryFrequency, 1),
    }
    p.msgAppReader = &streamReader{
        lg:     t.Logger,
        peerID: peerID,
        typ:    streamTypeMessage,
        tr:     t,
        picker: picker,
        status: status,
        recvc:  p.recvc,
        propc:  p.propc,
        rl:     rate.NewLimiter(t.DialRetryFrequency, 1),
    }

    p.msgAppV2Reader.start()
    p.msgAppReader.start()

    return p
}

上面創建了一個pipeline,並調用了它的start方法,處理消息raft狀態機的消息發送與返回結果

type pipeline struct {
    peerID types.ID //該pipeline對應節點的ID

    tr     *Transport  //關聯的rafthttp.Transport實例
    picker *urlPicker  //用於選擇可用的url
    status *peerStatus //當前peer的狀態
    raft   Raft
    errorc chan error
    // deprecate when we depercate v2 API
    followerStats *stats.FollowerStats

    msgc chan raftpb.Message //pipeline實例從該通道中獲取待發送的消息
    // wait for the handling routines
    wg    sync.WaitGroup //負責同步多個goroutine結束。每個pipeline默認開啓4個goroutine來處理msgc中的消息,必須先關閉這些goroutine,才能真正關閉該pipeline
    stopc chan struct{}
}

func (p *pipeline) start() {
    p.stopc = make(chan struct{})
    p.msgc = make(chan raftpb.Message, pipelineBufSize)//初始化msgc通道,默認緩衝是64個
    p.wg.Add(connPerPipeline)
    for i := 0; i < connPerPipeline; i++ {//默認開啓4個goroutine來處理msgc中待發送的消息
        go p.handle()//並將消息發送給對端節點
    }

    if p.tr != nil && p.tr.Logger != nil {
        p.tr.Logger.Info(
            "started HTTP pipelining with remote peer",
            zap.String("local-member-id", p.tr.ID.String()),
            zap.String("remote-peer-id", p.peerID.String()),
        )
    } else {
        plog.Infof("started HTTP pipelining with peer %s", p.peerID)
    }
}
 …省略其它代碼
//下面是發送消息相關
//循環處理msgc通道中待發送的消息,然後調用pipeline.post()方法將其發送出去,發送結束之後會調用底層的Raft接口的響應方法報告發送結果
func (p *pipeline) handle() {
    defer p.wg.Done()

    for {
        select {
        case m := <-p.msgc://獲取待發送的MsgSnap類型的消息
            start := time.Now()
            err := p.post(pbutil.MustMarshal(&m))//將消息序列化,然後創建HTTP請求併發送出去
            end := time.Now()

            if err != nil {
                //通知不可達
                p.status.deactivate(failureType{source: pipelineMsg, action: "write"}, err.Error())

                if m.Type == raftpb.MsgApp && p.followerStats != nil {
                    p.followerStats.Fail()
                }
                p.raft.ReportUnreachable(m.To)//通知底層的etcd-raft模塊,當前節點與指定的節點無法連通
                if isMsgSnap(m) {//快照數據則向狀態機報告發送失敗
                    p.raft.ReportSnapshot(m.To, raft.SnapshotFailure)
                }
                sentFailures.WithLabelValues(types.ID(m.To).String()).Inc()
                continue
            }

            p.status.activate()//連接狀態爲連通active
            if m.Type == raftpb.MsgApp && p.followerStats != nil {
                p.followerStats.Succ(end.Sub(start))
            }
            if isMsgSnap(m) {//向底層raft-node狀態機發送成功的消息
                p.raft.ReportSnapshot(m.To, raft.SnapshotFinish)
            }
            sentBytes.WithLabelValues(types.ID(m.To).String()).Add(float64(m.Size()))
        case <-p.stopc:
            return
        }
    }
}

// post POSTs a data payload to a url. Returns nil if the POST succeeds,
// error on any failure.
func (p *pipeline) post(data []byte) (err error) {
    u := p.picker.pick()//獲取對端暴露的url地址
    //創建HTTP POST請求的Request
    req := createPostRequest(u, RaftPrefix, bytes.NewBuffer(data), "application/protobuf", p.tr.URLs, p.tr.ID, p.tr.ClusterID)

    done := make(chan struct{}, 1)//通知下面的goroutine請求是否已經發送完成
    ctx, cancel := context.WithCancel(context.Background())
    req = req.WithContext(ctx)
    go func() {//該goroutine主要用於監聽請求是否需要取消
        select {
        case <-done:
        case <-p.stopc://如果請求過程中,pipeline被關閉,則取消該請求
            waitSchedule()
            cancel()//取消請求
        }
    }()

    resp, err := p.tr.pipelineRt.RoundTrip(req)//發送上述HTTP POST請求,並獲取到對應的的響應
    done <- struct{}{}//通知上述goroutine,請求已經發送完畢
    if err != nil {
        p.picker.unreachable(u)
        return err
    }
    defer resp.Body.Close()
    b, err := ioutil.ReadAll(resp.Body)//讀取HTTP Response.Body內容
    if err != nil {
        p.picker.unreachable(u)//出現異常則將該URL標識爲不可用
        return err
    }

    err = checkPostResponse(resp, b, req, p.peerID)//檢測響應的內容
    if err != nil {
        p.picker.unreachable(u)
        // errMemberRemoved is a critical error since a removed member should
        // always be stopped. So we use reportCriticalError to report it to errorc.
        if err == errMemberRemoved {
            reportCriticalError(err, p.errorc)
        }
        return err
    }

    return nil
}

…省略其它代碼

接收消息流程

  1. 實現ServeHTTP方法,
func newPipelineHandler(t *Transport, r Raft, cid types.ID) http.Handler {
	…省略其它代碼
}
func (h *snapshotHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	…省略其它代碼
//TODO 調用Process 同步raft狀態機
    if err := h.r.Process(context.TODO(), m); err != nil {
		…省略其它代碼
	}
…省略其它代碼
	
}

Raft接口
etcd/etcdserver/api/rafthttp/transport.go

type Raft interface {
    Process(ctx context.Context, m raftpb.Message) error  //將指定消息傳遞到etcd-raft模塊進行處理
    IsIDRemoved(id uint64) bool                           //檢測當前節點是否從當前集羣中被移除
    ReportUnreachable(id uint64)                          //通知底層的etcd-raft模塊,當前節點與指定的節點無法連通
    ReportSnapshot(id uint64, status raft.SnapshotStatus) //通知底層的etcd-raft模塊,快照數據是否發送成功
}

2.EtcdServer是核心結構體,實現了transport裏面的Raft接口,然後再通過自已實現的process方法調用底層raft狀態機Step方法(整體的邏輯都在這個結構體,具體實現再通過其它子模塊來實現)
etcd/etcdserver/server.go

func (s *EtcdServer) Start() {
    s.start()
     …省略其它代碼
}
func (s *EtcdServer) start() {
	…省略其它代碼
	go s.run()

}

func (s *EtcdServer) run() {
	…省略其它代碼
	//真正去啓動raft
	s.r.start(rh)
	…省略其它代碼
}

func (s *EtcdServer) Process(ctx context.Context, m raftpb.Message) error {
    …省略其它代碼
    return s.r.Step(ctx, m)
}

func (s *EtcdServer) IsIDRemoved(id uint64) bool { return s.cluster.IsIDRemoved(types.ID(id)) }

func (s *EtcdServer) ReportUnreachable(id uint64) { s.r.ReportUnreachable(id) }



節點投票過程

  1. raft結構體實現的是狀態機的核心邏輯,具體細節點部分得再開一篇文章來回顧了

etcd/raft/raft.go

//TODO 節點投票過程
func (r *raft) Step(m pb.Message) error {
    // Handle the message term, which may result in our stepping down to a follower.
    switch {
    case m.Term == 0:
        // local message
    case m.Term > r.Term: //例如參與選舉的Term值會比當前未參與的值大
        //當節點(無論是什麼角色,包括上一屆Leader,Follower,Candidate)收到Term比自己任期號大,
        //並且消息類型是MsgApp、MsgHeartbeat、MsgSnap類型的消息都會調用becomeFollower(m.Term,m.From),
        //都會將當前節點的狀態切換成Follower,並進行相關狀態的初始化

        if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote {
            //根據消息的Context字段判斷收到的MsgPreVote(或MsgVote)消息是否爲Leader
            //節點轉移場景下產生的,如果是,則強制當前節點參與本次預選(或選舉)
            force := bytes.Equal(m.Context, []byte(campaignTransfer))
            //檢測集羣是否開啓CheckQuorum模式,當前節點是否有已知的Lead節點,以及其選舉計時器的時間
            inLease := r.checkQuorum && r.lead != None && r.electionElapsed < r.electionTimeout
            if !force && inLease { //滿足此條件,該節點不參與此次選舉
                return nil
            }
        }
        switch { //在這個switch中,當前節點會根據消息類型決定是否切換狀態
        case m.Type == pb.MsgPreVote: //收到MsgPreVote消息時,不會引起當前節點的狀態切換
            // Never change our term in response to a PreVote
        case m.Type == pb.MsgPreVoteResp && !m.Reject:
        default:
            r.logger.Infof("%x [term: %d] received a %s message with higher term from %x [term: %d]",
                r.id, r.Term, m.Type, m.From, m.Term)
            if m.Type == pb.MsgApp || m.Type == pb.MsgHeartbeat || m.Type == pb.MsgSnap {
                r.becomeFollower(m.Term, m.From)
            } else {
                r.becomeFollower(m.Term, None)
            }
        }

    case m.Term < r.Term:
        if (r.checkQuorum || r.preVote) && (m.Type == pb.MsgHeartbeat || m.Type == pb.MsgApp) {
           
            r.send(pb.Message{To: m.From, Type: pb.MsgAppResp})
        } else if m.Type == pb.MsgPreVote {
            r.send(pb.Message{To: m.From, Term: r.Term, Type: pb.MsgPreVoteResp, Reject: true})
        } else {
            // ignore other cases
            r.logger.Infof("%x [term: %d] ignored a %s message with lower term from %x [term: %d]",
                r.id, r.Term, m.Type, m.From, m.Term)
        }
        return nil
    }

    switch m.Type {
    case pb.MsgHup: //推動選舉(Flower轉成PreCandidate發送的消息)
        if r.state != StateLeader { //只有非Leader狀態的節點纔會處理MsgHup消息
            //檢查是否有未執行的配置變更,大致就是先取出可提交還未執行的這一段,
            //然後檢查裏面是否有是變更集羣配置的消息,如果有則直接return不進入candidate狀態。
            if !r.promotable() {
                r.logger.Warningf("%x is unpromotable and can not campaign; ignoring MsgHup", r.id)
                return nil
            }
            //獲取raftLog中已提交但未應用的Entry記錄
            ents, err := r.raftLog.slice(r.raftLog.applied+1, r.raftLog.committed+1, noLimit)
            if err != nil {
                r.logger.Panicf("unexpected error getting unapplied entries (%v)", err)
            }
            //檢測是否有未應用的EntryConfChange記錄,如果有就放棄發起選舉的機會
            if n := numOfPendingConf(ents); n != 0 && r.raftLog.committed > r.raftLog.applied {
                r.logger.Warningf("%x cannot campaign at term %d since there are still %d pending configuration changes to apply", r.id, r.Term, n)
                return nil
            }
            //進入選舉
            r.logger.Infof("%x is starting a new election at term %d", r.id, r.Term)
            if r.preVote {
                //檢測當前集羣是否開啓了PreVote模式,如果開啓了
                //調用raft.campaign()方法切換當前節點的角色,發起PreVote
                r.campaign(campaignPreElection)
            } else {
                r.campaign(campaignElection)
            }
        } else { //如果當前節點已經是Leader狀態,則僅僅輸出一條Debug日誌
            r.logger.Debugf("%x ignoring MsgHup because already leader", r.id)
        }

    case pb.MsgVote, pb.MsgPreVote: //投票,預投票消息處理
        // We can vote if this is a repeat of a vote we've already cast...
        // 初步判斷是否可以投票
        //1. 如果自身記錄的Vote值和消息的來源者相同,說明是條重複消息
        //2. 如果自身尚未投票,且當前沒有leader,則可以投。
        canVote := r.Vote == m.From ||
            (r.Vote == None && r.lead == None) ||
            (m.Type == pb.MsgPreVote && m.Term > r.Term)
        //與本地最新的持久化日誌比較
        if canVote && r.raftLog.isUpToDate(m.Index, m.LogTerm) {
            //TODO 發送投票信息
            //判斷成功,則把票回覆該節點,把票投給它。自身記錄Vote,並重設election的計數器。
            r.send(pb.Message{To: m.From, Term: m.Term, Type: voteRespMsgType(m.Type)})
            if m.Type == pb.MsgVote { //如果是MsgVote處理
                // Only record real votes.
                r.electionElapsed = 0
                r.Vote = m.From
            }
        } else {
            //否則回覆拒絕投票給該節點
            r.send(pb.Message{To: m.From, Term: r.Term, Type: voteRespMsgType(m.Type), Reject: true})
        }

    default:
        err := r.step(r, m)
        if err != nil {
            return err
        }
    }
    return nil
}

參照
https://raft.github.io/
https://blog.csdn.net/skh2015java/category_9284671.html

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章