【kubernetes/k8s源碼分析】calico typha源碼分析

描述

    Typha:在節點數比較多的情況下,Felix 可通過 Typha 直接和 Etcd 進行數據交互,不通過 kube-apiserver,既降低其壓力。(有待驗證)

                                                    引入網上他人圖片,恩畫的非常好

  • BGPClient(BIRD):把 Felix寫入 kernel的路由信息分發到當前 Calico網絡,確保 workload 間的通信
  • BGPRoute Reflector(BIRD):大規模部署時使用,通過一個或者多個 BGPRoute Reflector 來完成集中式的路由分發;後端中有新的規則加入時,Route Reflector 就會將新的記錄同步

 

1. TyphaDaemon New 函數

    NewClientV3 建立客戶端連接,看着像 etcd,接着看看怎麼使用的,github.com/projectcalico/libcalico-go/lib/backend/client.go 中定義了 backend.NewClient,可以使用 etcd 與 kubernetes

    ConfigureEarlyLogging 配置日誌

func New() *TyphaDaemon {
	return &TyphaDaemon{
		NewClientV3: func(config apiconfig.CalicoAPIConfig) (DatastoreClient, error) {
			client, err := clientv3.New(config)
			if err != nil {
				return nil, err
			}
			return ClientV3Shim{client.(RealClientV3), config}, nil
		},
		ConfigureEarlyLogging: logutils.ConfigureEarlyLogging,
		ConfigureLogging:      logutils.ConfigureLogging,
		CachesBySyncerType:    map[syncproto.SyncerType]syncserver.BreadcrumbProvider{},
	}
}


2. InitializeAndServeForever 函數

    DoEarlyRuntimeSetup 主要是前期設置日誌格式以及日誌級別,主要用於配置的日誌記錄

    ParseCommandLineArgs 這個分析命令行參數,--config-file:/etc/calico/typha.cfg

[global]
MetadataAddr = None
LogFilePath = None
LogSeverityFile = None

     LoadConfiguration 這個函數內容挺多,pkg/config/config_params.go 中 結構體 Config,初始化使用了默認值,使用反射機制,字段非常多,如下一部分,從環境變量中 load 前綴爲 typha 的 key,在從配置文件 /etc/calico/typha.cfg 中讀取

DatastoreType string `config:"oneof(kubernetes,etcdv3);etcdv3;non-zero,die-on-fail"`

EtcdAddr      string   `config:"authority;127.0.0.1:2379;local"`
EtcdScheme    string   `config:"oneof(http,https);http;local"`
EtcdKeyFile   string   `config:"file(must-exist);;local"`
EtcdCertFile  string   `config:"file(must-exist);;local"`
EtcdCaFile    string   `config:"file(must-exist);;local"`
EtcdEndpoints []string `config:"endpoint-list;;local"`
func (t *TyphaDaemon) InitializeAndServeForever(cxt context.Context) error {
	t.DoEarlyRuntimeSetup()
	t.ParseCommandLineArgs(nil)
	err := t.LoadConfiguration(cxt)
	if err != nil { // Should only happen if context is canceled.
		return err
	}
	t.CreateServer()
	t.Start(cxt)
	t.WaitAndShutDown(cxt)
	return nil
}

   2.1 CreateServer 函數

    創建以及配置服務組件,就是不啓動

    FelixSyncerByIface 調用 felixsyncer.New 創建 felix syncer,實現在 github.com/projectcalico/libcalico-go/lib/backend/sycersv1/felixsyncer/felixsyncerv1.go, 定義了一大隊 watchersyncer.ResourceType,實例化 watcherSyncer,實現在 github.com/projectcalico/libcalico-go/lib/backend/watchersyncer/watchersyncer.go 中,具體待定分析

    BGPSyncerByIface 調用 bgpsyncer.New 創建 bgp syncer,實現在 github.com/projectcalico/libcalico-go/lib/backend/syncersyv1/bgpsyncer/bgpsyncer.go,同樣實例化 watchersyncer

// CreateServer creates and configures (but does not start) the server components.
func (t *TyphaDaemon) CreateServer() {
	// Health monitoring, for liveness and readiness endpoints.
	t.healthAggregator = health.NewHealthAggregator()

	// Now create the Syncer and caching layer (one pipeline for each syncer we support).
	t.addSyncerPipeline(syncproto.SyncerTypeFelix, t.DatastoreClient.FelixSyncerByIface)
	t.addSyncerPipeline(syncproto.SyncerTypeBGP, t.DatastoreClient.BGPSyncerByIface)

}

     2.1.1 addSyncerPipeline 函數

      calc.NewSyncerCallbacksDecoupler() 主要是將數據從 syncer 發給 validator,實現在 pkg/calc/async_decoupler.go

      calc.NewSyncerCallbacksDecoupler 主要是將數據從 validator 到 cache

      snapcache.New 創建 snapshot cache,具體在第 3 章節講解

func (t *TyphaDaemon) addSyncerPipeline(
	syncerType syncproto.SyncerType,
	newSyncer func(callbacks bapi.SyncerCallbacks) bapi.Syncer,
) {
	// Get a Syncer from the datastore, which will feed the validator layer with updates.
	syncerToValidator := calc.NewSyncerCallbacksDecoupler()
	syncer := newSyncer(syncerToValidator)
	log.Debugf("Created Syncer: %#v", syncer)

	// Create the validator, which sits between the syncer and the cache.
	validatorToCache := calc.NewSyncerCallbacksDecoupler()
	validator := calc.NewValidationFilter(validatorToCache)

	// Create our snapshot cache, which stores point-in-time copies of the datastore contents.
	cache := snapcache.New(snapcache.Config{
		MaxBatchSize:     t.ConfigParams.SnapshotCacheMaxBatchSize,
		HealthAggregator: t.healthAggregator,
	})

	pipeline := &syncerPipeline{
		Type:              syncerType,
		Syncer:            syncer,
		SyncerToValidator: syncerToValidator,
		Validator:         validator,
		ValidatorToCache:  validatorToCache,
		Cache:             cache,
	}
	t.SyncerPipelines = append(t.SyncerPipelines, pipeline)
	t.CachesBySyncerType[syncerType] = cache
}

   2.2 TyphaDaemon Start 函數

     p.Cache.Start 第 3.1 章節講解

func (p syncerPipeline) Start(cxt context.Context) {
	logCxt := log.WithField("syncerType", p.Type)
	logCxt.Info("Starting syncer")
	p.Syncer.Start()
	logCxt.Info("Starting syncer-to-validator decoupler")
	go p.SyncerToValidator.SendTo(p.Validator)
	logCxt.Info("Starting validator-to-cache decoupler")
	go p.ValidatorToCache.SendTo(p.Cache)
	logCxt.Info("Starting cache")
	p.Cache.Start(cxt)
	logCxt.Info("Started syncer pipeline")
}

     2.2.1 從 syncer 發送給 validator 的 channel

func (a *SyncerCallbacksDecoupler) SendToContext(cxt context.Context, sink api.SyncerCallbacks) {
	for {
		select {
		case obj := <-a.c:
			switch obj := obj.(type) {
			case api.SyncStatus:
				sink.OnStatusUpdated(obj)
			case []api.Update:
				sink.OnUpdates(obj)
			}
		case <-cxt.Done():
			logrus.WithError(cxt.Err()).Info("Context asked us to stop")
			return
		}
	}
}

   2.3 syncserver Start 函數

    server 函數內容比較多,但是內容比較簡單,處理請求,緩存連接 conn,最後調用 connection.handle 處理

func (s *Server) Start(cxt context.Context) {
	s.Finished.Add(2)
	go s.serve(cxt)
	go s.governNumberOfConnections(cxt)
}

     2.3.1 connection.handle 函數

      doHandshake處理握手請求,類型 MsgClientHello

      sendSnapshotAndUpdatesToClient 異步發送 snapshot 更新

      sendPingsToClient 定期的 ping-pong 請求

   2.4 backend 後端爲 kubernetes

if t.ConfigParams.ConnectionRebalancingMode == "kubernetes" {
	log.Info("Kubernetes connection rebalancing is enabled, starting k8s poll goroutine.")
	k8sAPI := k8s.NewK8sAPI()
	ticker := jitter.NewTicker(
		t.ConfigParams.K8sServicePollIntervalSecs,
		t.ConfigParams.K8sServicePollIntervalSecs/10)
	go k8s.PollK8sForConnectionLimit(cxt, t.ConfigParams, ticker.C, k8sAPI, t.Server)
}
log.Info("Started the datastore Syncer/cache layer/server.")

 

3. snapcache New 函數

    Ctrie 中的每個節點都有一個和它相關聯的同伴節點,當進行快照時,root節點都會被拷貝到一個新的節點,當樹中的節點被訪問時,也會被惰性拷貝到新的節點(持久化數據結構),這樣的快照操作是常數耗時的。

    Ctrie 跟同步map或者跳躍表比起來,插入操作更耗時一些,因爲尋址操作變多了。Ctrie真正的優勢是內存消耗,跟大多的Hash表不同,它總是一系列在tree中的keys。另一個性能優勢就是它可以在常量時間內完成線性快照

 

func New(config Config) *Cache {
	config.ApplyDefaults()
	kvs := ctrie.New(nil /*default hash factory*/)
	cond := sync.NewCond(&sync.Mutex{})
	snap := &Breadcrumb{
		Timestamp: time.Now(),
		nextCond:  cond,
		KVs:       kvs.ReadOnlySnapshot(),
	}
	c := &Cache{
		config:            config,
		inputC:            make(chan interface{}, config.MaxBatchSize*2),
		breadcrumbCond:    cond,
		kvs:               kvs,
		currentBreadcrumb: (unsafe.Pointer)(snap),
		wakeUpTicker:      jitter.NewTicker(config.WakeUpInterval, config.WakeUpInterval/10),
		healthTicks:       time.NewTicker(healthInterval).C,
	}
	if config.HealthAggregator != nil {
		config.HealthAggregator.RegisterReporter(healthName, &health.HealthReport{Live: true, Ready: true}, healthInterval*2)
	}
	c.reportHealth()
	return c
}

   3.1 Cache Start 函數

// Start starts the cache's main loop in a background goroutine.
func (c *Cache) Start(ctx context.Context) {
	go c.loop(ctx)
}

func (c *Cache) loop(ctx context.Context) {
	for {
		// First, block, waiting for updates and batch them up in our pendingXXX fields.
		// This will opportunistically slurp up a limited number of pending updates.
		if err := c.fillBatchFromInputQueue(ctx); err != nil {
			log.WithError(err).Error("Snapshot main loop exiting.")
			return
		}
		// Then publish the updates in new Breadcrumb(s).
		c.publishBreadcrumbs()
	}
}

     3.1.1 fillBatchFromInputQueue 函數

      處理 Cache inputC channel,把類型爲 update 的存入 pending,publishBreadCrumbs 處理 pending 的 update

// fillBatchFromInputQueue waits for some input on the input channel, then opportunistically
// pulls as much as possible from the channel.  Input is stored in the pendingXXX fields for
// the next stage of processing.
func (c *Cache) fillBatchFromInputQueue(ctx context.Context) error {
	batchSize := 0
	storePendingUpdate := func(obj interface{}) {

	log.Debug("Waiting for next input...")
	select {
	case obj := <-c.inputC:
		log.WithField("update", obj).Debug("Got first update, peeking...")
		storePendingUpdate(obj)
	batchLoop:
		for batchSize < c.config.MaxBatchSize {
			select {
			case obj = <-c.inputC:
				storePendingUpdate(obj)
			case <-ctx.Done():
				log.WithError(ctx.Err()).Info("Context is done. Stopping.")
				return ctx.Err()
			default:
				break batchLoop
			}
		}
		log.WithField("numUpdates", batchSize).Debug("Finished reading batch.")

     3.1.2 publishBreadcrumb

     更新 master Ctrie 發佈新的 Breadcrumb,其中包含只讀  snapshot

     

4. watcherSyncer

    watcherSyncer 結構體實現了 api.Syncer 接口,兩個方法,Start() 和 Stop()

// watcherSyncer implements the api.Syncer interface.
type watcherSyncer struct {
	status        api.SyncStatus
	watcherCaches []*watcherCache
	results       chan interface{}
	numSynced     int
	callbacks     api.SyncerCallbacks
	wgwc          *sync.WaitGroup
	wgws          *sync.WaitGroup
	cancel        context.CancelFunc
}

   4.1 Start() 函數調用的 run 方法

     run 函數實現了主要的邏輯 syncer,首先發送 wait-for-ready 狀態,等待連接到 datastore。目前有兩個 watcherSyncer,felix 與 bgp。

     對每一種 syncer 註冊的 resourceType,也就是 watcherCache 中處理 run,第 5.1 章節講解

// run implements the main syncer loop that loops forever receiving watch events and translating
// to syncer updates.
func (ws *watcherSyncer) run(ctx context.Context) {
	log.Debug("Sending initial status event and starting watchers")
	ws.wgws.Add(1)
	ws.sendStatusUpdate(api.WaitForDatastore)
	for _, wc := range ws.watcherCaches {
		ws.wgwc.Add(1)
		go func(wc *watcherCache) {
			wc.run(ctx)
			log.Debug("Watcher cache run completed")
			ws.wgwc.Done()
		}(wc)
	}

   4.2 watcherSyncer 處理 channel 中 results 

log.Info("Starting main event processing loop")
var updates []api.Update
for result := range ws.results {
	// Process the data - this will append the data in subsequent calls, and action
	// it if we hit a non-update event.
	updates := ws.processResult(updates, result)

	// Append results into the one update until we either flush the channel or we
	// hit our fixed limit per update.
consolidatationloop:
	for ii := 0; ii < maxUpdatesToConsolidate; ii++ {
		select {
		case next := <-ws.results:
			updates = ws.processResult(updates, next)
		default:
			break consolidatationloop
		}
	}

	// Perform final processing (pass in a nil result) before we loop and hit the blocking
	// call again.
	updates = ws.sendUpdates(updates)
}

   4.3 processResult 函數處理 result channel

     不採取立刻更新操作,分組發送 felix 更新,類型有 api.Update error api.SyncStatus

// Process a result from the result channel.  We don't immediately action updates, but
// instead start grouping them together so that we can send a larger single update to
// Felix.
func (ws *watcherSyncer) processResult(updates []api.Update, result interface{}) []api.Update {

	// Switch on the result type.
	switch r := result.(type) {
	case []api.Update:
		// This is an update.  If we don't have previous updates then also check to see
		// if we need to shift the status into Resync.
		// We append these updates to the previous if there were any.
		if len(updates) == 0 && ws.status == api.WaitForDatastore {
			ws.sendStatusUpdate(api.ResyncInProgress)
		}
		updates = append(updates, r...)

 

5. watcherCache 

    主要是 results channel,無類型,只接收 error,api.Update,api.SyncStatus

// The watcherCache provides watcher/syncer support for a single key type in the
// backend.  These results are sent to the main WatcherSyncer on a buffered "results"
// channel.  To ensure the order of events is received correctly by the main WatcherSyncer,
// we send all notification types in this channel.  Note that because of this the results
// channel is untyped - however the watcherSyncer only expects one of the following
// types:
// -  An error
// -  An api.Update
// -  A api.SyncStatus (only for the very first InSync notification)
type watcherCache struct {
	logger               *logrus.Entry
	client               api.Client
	watch                api.WatchInterface
	resources            map[string]cacheEntry
	oldResources         map[string]cacheEntry
	results              chan<- interface{}
	hasSynced            bool
	errors               int
	resourceType         ResourceType
	currentWatchRevision string
}

   5.1 watcherCache run 函數

// run creates the watcher and loops indefinitely reading from the watcher.
func (wc *watcherCache) run(ctx context.Context) {
	wc.logger.Debug("Watcher cache starting, start initial sync processing")
	wc.resyncAndCreateWatcher(ctx)

     5.1.1 resyncAndCreateWatcher 函數,循環實現resync,直到成功完成了重新同步啓動了watcher

// resyncAndCreateWatcher loops performing resync processing until it successfully
// completes a resync and starts a watcher.
func (wc *watcherCache) resyncAndCreateWatcher(ctx context.Context) {
	// The passed in context allows a resync to be stopped mid-resync. The resync should be stopped as quickly as
	// possible, but there should be usable data available in wc.resources so that delete events can be sent.
	// The strategy is to
	// - cancel any long running functions calls made from here, i.e. pass ctx to the client.list() calls
	//    - but if it finishes, then ensure that the listing gets processed.
	// - cancel any sleeps if the context is cancelled

     5.1.1.1 如果沒有版本,那就實行全部同步

// If we don't have a currentWatchRevision then we need to perform a full resync.
	performFullResync := wc.currentWatchRevision == ""

     5.1.1.2 實行全部同步

syncer resourcetype 實現路徑  
felix configUpdateProcessor github.com/projectcalico/libcalico-go/lib/backend/syncersv1/updateprocessors/configurationprocessor.go  
felix      
       
       

     UpdateProcessor.OnSyncerStarting 主要是開關功能

     client.List 從後端獲取該資源,本文使用的是 kubernetes 後端

     handleWatchListEvent 對每一 KV,調用 UpdateProcessor的 Process 方法,對於符合的則調用 handleAddedOrModifiedUpdate 更新 KV,其實就是往 channel 塞值 wc.results <- []api.Update

if performFullResync {

	// Notify the converter that we are resyncing.
	if wc.resourceType.UpdateProcessor != nil {
		wc.logger.Debug("Trigger converter resync notification")
		wc.resourceType.UpdateProcessor.OnSyncerStarting()
	}

	// Start the sync by Listing the current resources.
	l, err := wc.client.List(ctx, wc.resourceType.ListInterface, "")
	
	// Once this point is reached, it's important not to drop out if the context is cancelled.
	// Move the current resources over to the oldResources
	wc.oldResources = wc.resources
	wc.resources = make(map[string]cacheEntry, 0)

	// Send updates for each of the resources we listed - this will revalidate entries in
	// the oldResources map.
	for _, kvp := range l.KVPairs {
		wc.handleWatchListEvent(kvp)
	}

	// We've listed the current settings.  Complete the sync by notifying the main WatcherSyncer
	// go routine (if we haven't already) and by sending deletes for the old resources that were
	// not acknowledged by the List.  The oldResources will be empty after this call.
	wc.finishResync()

	// Store the current watch revision.  This gets updated on any new add/modified event.
	wc.currentWatchRevision = l.Revision
}

     5.1.1.3 從現在的版本進行 watch 操作

// And now start watching from the revision returned by the List, or from a previous watch event
// (depending on whether we were performing a full resync).
w, err := wc.client.Watch(ctx, wc.resourceType.ListInterface, wc.currentWatchRevision)

   5.1.2 從 watch channel 同步數據

      類型包括 新增 修改 刪除 以及錯誤,主要是靠 handleWatchListEvent 處理,比較容易看懂,就是新增的更新刪除的 KV 處理到 results channel,由 watcherSyncer 處理

case event, ok := <-wc.watch.ResultChan():
	if !ok {
		// If the channel is closed then resync/recreate the watch.
		wc.logger.Info("Watch channel closed by remote - recreate watcher")
		wc.resyncAndCreateWatcher(ctx)
		continue
	}
	wc.logger.WithField("RC", wc.watch.ResultChan()).Debug("Reading event from results channel")

	// Handle the specific event type.
	switch event.Type {
	case api.WatchAdded, api.WatchModified:

 

總結:

    typha 啓動從參數,環境變量,文件讀取配置參數,創建後端連接(kubernetes / etcd)

    註冊了兩個 pipeline,創建 felix bgp syncer,每一個都有註冊了 resourceType

    watcherCache,對 resource 進行同步,list watch,扔到 channel results

    watcherSyncer 處理 channel results

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章